gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2018 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "params.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73
  74 /* This file should be included last.  */
  75 #include "target-def.h"
  76
  77 /* Forward definitions of types.  */
  78 typedef struct minipool_node    Mnode;
  79 typedef struct minipool_fixup   Mfix;
  80
  81 /* The last .arch and .fpu assembly strings that we printed.  */
  82 static std::string arm_last_printed_arch_string;
  83 static std::string arm_last_printed_fpu_string;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 187 static void emit_constant_insn (rtx cond, rtx pattern);
 188 static rtx_insn *emit_set_insn (rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 191                                   tree, bool);
 192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 193                              const_tree, bool);
 194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 195                                       const_tree, bool);
 196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 199                                       const_tree);
 200 static rtx aapcs_libcall_value (machine_mode);
 201 static int aapcs_select_return_coproc (const_tree, const_tree);
 202
 203 #ifdef OBJECT_FORMAT_ELF
 204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 206 #endif
 207 #ifndef ARM_PE
 208 static void arm_encode_section_info (tree, rtx, int);
 209 #endif
 210
 211 static void arm_file_end (void);
 212 static void arm_file_start (void);
 213 static void arm_insert_attributes (tree, tree *);
 214
 215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 216                                         tree, int *, int);
 217 static bool arm_pass_by_reference (cumulative_args_t,
 218                                    machine_mode, const_tree, bool);
 219 static bool arm_promote_prototypes (const_tree);
 220 static bool arm_default_short_enums (void);
 221 static bool arm_align_anon_bitfield (void);
 222 static bool arm_return_in_msb (const_tree);
 223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 224 static bool arm_return_in_memory (const_tree, const_tree);
 225 #if ARM_UNWIND_INFO
 226 static void arm_unwind_emit (FILE *, rtx_insn *);
 227 static bool arm_output_ttype (rtx);
 228 static void arm_asm_emit_except_personality (rtx);
 229 #endif
 230 static void arm_asm_init_sections (void);
 231 static rtx arm_dwarf_register_span (rtx);
 232
 233 static tree arm_cxx_guard_type (void);
 234 static bool arm_cxx_guard_mask_bit (void);
 235 static tree arm_get_cookie_size (tree);
 236 static bool arm_cookie_has_size (void);
 237 static bool arm_cxx_cdtor_returns_this (void);
 238 static bool arm_cxx_key_method_may_be_inline (void);
 239 static void arm_cxx_determine_class_data_visibility (tree);
 240 static bool arm_cxx_class_data_always_comdat (void);
 241 static bool arm_cxx_use_aeabi_atexit (void);
 242 static void arm_init_libfuncs (void);
 243 static tree arm_build_builtin_va_list (void);
 244 static void arm_expand_builtin_va_start (tree, rtx);
 245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 246 static void arm_option_override (void);
 247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 248 static void arm_option_restore (struct gcc_options *,
 249                                 struct cl_target_option *);
 250 static void arm_override_options_after_change (void);
 251 static void arm_option_print (FILE *, int, struct cl_target_option *);
 252 static void arm_set_current_function (tree);
 253 static bool arm_can_inline_p (tree, tree);
 254 static void arm_relayout_function (tree);
 255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 257 static bool arm_sched_can_speculate_insn (rtx_insn *);
 258 static bool arm_macro_fusion_p (void);
 259 static bool arm_cannot_copy_insn_p (rtx_insn *);
 260 static int arm_issue_rate (void);
 261 static int arm_first_cycle_multipass_dfa_lookahead (void);
 262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 264 static bool arm_output_addr_const_extra (FILE *, rtx);
 265 static bool arm_allocate_stack_slots_for_args (void);
 266 static bool arm_warn_func_return (tree);
 267 static tree arm_promoted_type (const_tree t);
 268 static bool arm_scalar_mode_supported_p (scalar_mode);
 269 static bool arm_frame_pointer_required (void);
 270 static bool arm_can_eliminate (const int, const int);
 271 static void arm_asm_trampoline_template (FILE *);
 272 static void arm_trampoline_init (rtx, tree, rtx);
 273 static rtx arm_trampoline_adjust_address (rtx);
 274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool arm_array_mode_supported_p (machine_mode,
 279                                         unsigned HOST_WIDE_INT);
 280 static machine_mode arm_preferred_simd_mode (scalar_mode);
 281 static bool arm_class_likely_spilled_p (reg_class_t);
 282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 285                                                      const_tree type,
 286                                                      int misalignment,
 287                                                      bool is_packed);
 288 static void arm_conditional_register_usage (void);
 289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 291 static void arm_autovectorize_vector_sizes (vector_sizes *);
 292 static int arm_default_branch_cost (bool, bool);
 293 static int arm_cortex_a5_branch_cost (bool, bool);
 294 static int arm_cortex_m_branch_cost (bool, bool);
 295 static int arm_cortex_m7_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 298                                           const vec_perm_indices &);
 299
 300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 333        affects_type_identity, handler, exclude } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 343     NULL },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 346     NULL },
 347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "naked",        0, 0, true,  false, false, false,
 350     arm_handle_fndecl_attribute, NULL },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 363   { "interfacearm", 0, 0, true,  false, false, false,
 364     arm_handle_fndecl_attribute, NULL },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 367     NULL },
 368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "notshared",    0, 0, false, true, false, false,
 371     arm_handle_notshared_attribute, NULL },
 372 #endif
 373   /* ARMv8-M Security Extensions support.  */
 374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 375     arm_handle_cmse_nonsecure_entry, NULL },
 376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 377     arm_handle_cmse_nonsecure_call, NULL },
 378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 379 };
 380 \f
 381 /* Initialize the GCC target structure.  */
 382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 385 #endif
 386
 387 #undef TARGET_LEGITIMIZE_ADDRESS
 388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 389
 390 #undef  TARGET_ATTRIBUTE_TABLE
 391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 392
 393 #undef  TARGET_INSERT_ATTRIBUTES
 394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 395
 396 #undef TARGET_ASM_FILE_START
 397 #define TARGET_ASM_FILE_START arm_file_start
 398 #undef TARGET_ASM_FILE_END
 399 #define TARGET_ASM_FILE_END arm_file_end
 400
 401 #undef  TARGET_ASM_ALIGNED_SI_OP
 402 #define TARGET_ASM_ALIGNED_SI_OP NULL
 403 #undef  TARGET_ASM_INTEGER
 404 #define TARGET_ASM_INTEGER arm_assemble_integer
 405
 406 #undef TARGET_PRINT_OPERAND
 407 #define TARGET_PRINT_OPERAND arm_print_operand
 408 #undef TARGET_PRINT_OPERAND_ADDRESS
 409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 412
 413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 415
 416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 418
 419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 421
 422 #undef TARGET_CAN_INLINE_P
 423 #define TARGET_CAN_INLINE_P arm_can_inline_p
 424
 425 #undef TARGET_RELAYOUT_FUNCTION
 426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 427
 428 #undef  TARGET_OPTION_OVERRIDE
 429 #define TARGET_OPTION_OVERRIDE arm_option_override
 430
 431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 433
 434 #undef TARGET_OPTION_SAVE
 435 #define TARGET_OPTION_SAVE arm_option_save
 436
 437 #undef TARGET_OPTION_RESTORE
 438 #define TARGET_OPTION_RESTORE arm_option_restore
 439
 440 #undef TARGET_OPTION_PRINT
 441 #define TARGET_OPTION_PRINT arm_option_print
 442
 443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 445
 446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 448
 449 #undef TARGET_SCHED_MACRO_FUSION_P
 450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 454
 455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 457
 458 #undef  TARGET_SCHED_ADJUST_COST
 459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 460
 461 #undef TARGET_SET_CURRENT_FUNCTION
 462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 463
 464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 466
 467 #undef TARGET_SCHED_REORDER
 468 #define TARGET_SCHED_REORDER arm_sched_reorder
 469
 470 #undef TARGET_REGISTER_MOVE_COST
 471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 472
 473 #undef TARGET_MEMORY_MOVE_COST
 474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 475
 476 #undef TARGET_ENCODE_SECTION_INFO
 477 #ifdef ARM_PE
 478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 479 #else
 480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 481 #endif
 482
 483 #undef  TARGET_STRIP_NAME_ENCODING
 484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 485
 486 #undef  TARGET_ASM_INTERNAL_LABEL
 487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 488
 489 #undef TARGET_FLOATN_MODE
 490 #define TARGET_FLOATN_MODE arm_floatn_mode
 491
 492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 494
 495 #undef  TARGET_FUNCTION_VALUE
 496 #define TARGET_FUNCTION_VALUE arm_function_value
 497
 498 #undef  TARGET_LIBCALL_VALUE
 499 #define TARGET_LIBCALL_VALUE arm_libcall_value
 500
 501 #undef TARGET_FUNCTION_VALUE_REGNO_P
 502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 503
 504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 508
 509 #undef  TARGET_RTX_COSTS
 510 #define TARGET_RTX_COSTS arm_rtx_costs
 511 #undef  TARGET_ADDRESS_COST
 512 #define TARGET_ADDRESS_COST arm_address_cost
 513
 514 #undef TARGET_SHIFT_TRUNCATION_MASK
 515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 524   arm_autovectorize_vector_sizes
 525
 526 #undef  TARGET_MACHINE_DEPENDENT_REORG
 527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 528
 529 #undef  TARGET_INIT_BUILTINS
 530 #define TARGET_INIT_BUILTINS  arm_init_builtins
 531 #undef  TARGET_EXPAND_BUILTIN
 532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 533 #undef  TARGET_BUILTIN_DECL
 534 #define TARGET_BUILTIN_DECL arm_builtin_decl
 535
 536 #undef TARGET_INIT_LIBFUNCS
 537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 538
 539 #undef TARGET_PROMOTE_FUNCTION_MODE
 540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 541 #undef TARGET_PROMOTE_PROTOTYPES
 542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 543 #undef TARGET_PASS_BY_REFERENCE
 544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 545 #undef TARGET_ARG_PARTIAL_BYTES
 546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 547 #undef TARGET_FUNCTION_ARG
 548 #define TARGET_FUNCTION_ARG arm_function_arg
 549 #undef TARGET_FUNCTION_ARG_ADVANCE
 550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 551 #undef TARGET_FUNCTION_ARG_PADDING
 552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 553 #undef TARGET_FUNCTION_ARG_BOUNDARY
 554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 555
 556 #undef  TARGET_SETUP_INCOMING_VARARGS
 557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 558
 559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 561
 562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 564 #undef TARGET_TRAMPOLINE_INIT
 565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 568
 569 #undef TARGET_WARN_FUNC_RETURN
 570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 571
 572 #undef TARGET_DEFAULT_SHORT_ENUMS
 573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 574
 575 #undef TARGET_ALIGN_ANON_BITFIELD
 576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 577
 578 #undef TARGET_NARROW_VOLATILE_BITFIELD
 579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 580
 581 #undef TARGET_CXX_GUARD_TYPE
 582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 583
 584 #undef TARGET_CXX_GUARD_MASK_BIT
 585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 586
 587 #undef TARGET_CXX_GET_COOKIE_SIZE
 588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 589
 590 #undef TARGET_CXX_COOKIE_HAS_SIZE
 591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 592
 593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 595
 596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 598
 599 #undef TARGET_CXX_USE_AEABI_ATEXIT
 600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 601
 602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 604   arm_cxx_determine_class_data_visibility
 605
 606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 608
 609 #undef TARGET_RETURN_IN_MSB
 610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 611
 612 #undef TARGET_RETURN_IN_MEMORY
 613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 614
 615 #undef TARGET_MUST_PASS_IN_STACK
 616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 617
 618 #if ARM_UNWIND_INFO
 619 #undef TARGET_ASM_UNWIND_EMIT
 620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 621
 622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 623 #undef TARGET_ASM_TTYPE
 624 #define TARGET_ASM_TTYPE arm_output_ttype
 625
 626 #undef TARGET_ARM_EABI_UNWINDER
 627 #define TARGET_ARM_EABI_UNWINDER true
 628
 629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 631
 632 #endif /* ARM_UNWIND_INFO */
 633
 634 #undef TARGET_ASM_INIT_SECTIONS
 635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 636
 637 #undef TARGET_DWARF_REGISTER_SPAN
 638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 639
 640 #undef  TARGET_CANNOT_COPY_INSN_P
 641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 642
 643 #ifdef HAVE_AS_TLS
 644 #undef TARGET_HAVE_TLS
 645 #define TARGET_HAVE_TLS true
 646 #endif
 647
 648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 650
 651 #undef TARGET_LEGITIMATE_CONSTANT_P
 652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 653
 654 #undef TARGET_CANNOT_FORCE_CONST_MEM
 655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 656
 657 #undef TARGET_MAX_ANCHOR_OFFSET
 658 #define TARGET_MAX_ANCHOR_OFFSET 4095
 659
 660 /* The minimum is set such that the total size of the block
 661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 662    divisible by eight, ensuring natural spacing of anchors.  */
 663 #undef TARGET_MIN_ANCHOR_OFFSET
 664 #define TARGET_MIN_ANCHOR_OFFSET -4088
 665
 666 #undef TARGET_SCHED_ISSUE_RATE
 667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 668
 669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 671   arm_first_cycle_multipass_dfa_lookahead
 672
 673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 675   arm_first_cycle_multipass_dfa_lookahead_guard
 676
 677 #undef TARGET_MANGLE_TYPE
 678 #define TARGET_MANGLE_TYPE arm_mangle_type
 679
 680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 682
 683 #undef TARGET_BUILD_BUILTIN_VA_LIST
 684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 685 #undef TARGET_EXPAND_BUILTIN_VA_START
 686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 689
 690 #ifdef HAVE_AS_TLS
 691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 693 #endif
 694
 695 #undef TARGET_LEGITIMATE_ADDRESS_P
 696 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 697
 698 #undef TARGET_PREFERRED_RELOAD_CLASS
 699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 700
 701 #undef TARGET_PROMOTED_TYPE
 702 #define TARGET_PROMOTED_TYPE arm_promoted_type
 703
 704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 706
 707 #undef TARGET_COMPUTE_FRAME_LAYOUT
 708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 709
 710 #undef TARGET_FRAME_POINTER_REQUIRED
 711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 712
 713 #undef TARGET_CAN_ELIMINATE
 714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 715
 716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 718
 719 #undef TARGET_CLASS_LIKELY_SPILLED_P
 720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 721
 722 #undef TARGET_VECTORIZE_BUILTINS
 723 #define TARGET_VECTORIZE_BUILTINS
 724
 725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 727   arm_builtin_vectorized_function
 728
 729 #undef TARGET_VECTOR_ALIGNMENT
 730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 731
 732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 734   arm_vector_alignment_reachable
 735
 736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 738   arm_builtin_support_vector_misalignment
 739
 740 #undef TARGET_PREFERRED_RENAME_CLASS
 741 #define TARGET_PREFERRED_RENAME_CLASS \
 742   arm_preferred_rename_class
 743
 744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 746
 747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 749   arm_builtin_vectorization_cost
 750 #undef TARGET_VECTORIZE_ADD_STMT_COST
 751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 752
 753 #undef TARGET_CANONICALIZE_COMPARISON
 754 #define TARGET_CANONICALIZE_COMPARISON \
 755   arm_canonicalize_comparison
 756
 757 #undef TARGET_ASAN_SHADOW_OFFSET
 758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 759
 760 #undef MAX_INSN_PER_IT_BLOCK
 761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 762
 763 #undef TARGET_CAN_USE_DOLOOP_P
 764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 765
 766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 768
 769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 771
 772 #undef TARGET_SCHED_FUSION_PRIORITY
 773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 774
 775 #undef  TARGET_ASM_FUNCTION_SECTION
 776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 777
 778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 780
 781 #undef TARGET_SECTION_TYPE_FLAGS
 782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 783
 784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 786
 787 #undef TARGET_C_EXCESS_PRECISION
 788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 789
 790 /* Although the architecture reserves bits 0 and 1, only the former is
 791    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 794
 795 #undef TARGET_FIXED_CONDITION_CODE_REGS
 796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 797
 798 #undef TARGET_HARD_REGNO_NREGS
 799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 800 #undef TARGET_HARD_REGNO_MODE_OK
 801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 802
 803 #undef TARGET_MODES_TIEABLE_P
 804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 805
 806 #undef TARGET_CAN_CHANGE_MODE_CLASS
 807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 808
 809 #undef TARGET_CONSTANT_ALIGNMENT
 810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 811 \f
 812 /* Obstack for minipool constant handling.  */
 813 static struct obstack minipool_obstack;
 814 static char *         minipool_startobj;
 815
 816 /* The maximum number of insns skipped which
 817    will be conditionalised if possible.  */
 818 static int max_insns_skipped = 5;
 819
 820 extern FILE * asm_out_file;
 821
 822 /* True if we are currently building a constant table.  */
 823 int making_const_table;
 824
 825 /* The processor for which instructions should be scheduled.  */
 826 enum processor_type arm_tune = TARGET_CPU_arm_none;
 827
 828 /* The current tuning set.  */
 829 const struct tune_params *current_tune;
 830
 831 /* Which floating point hardware to schedule for.  */
 832 int arm_fpu_attr;
 833
 834 /* Used for Thumb call_via trampolines.  */
 835 rtx thumb_call_via_label[14];
 836 static int thumb_call_reg_needed;
 837
 838 /* The bits in this mask specify which instruction scheduling options should
 839    be used.  */
 840 unsigned int tune_flags = 0;
 841
 842 /* The highest ARM architecture version supported by the
 843    target.  */
 844 enum base_architecture arm_base_arch = BASE_ARCH_0;
 845
 846 /* Active target architecture and tuning.  */
 847
 848 struct arm_build_target arm_active_target;
 849
 850 /* The following are used in the arm.md file as equivalents to bits
 851    in the above two flag variables.  */
 852
 853 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 854 int arm_arch4 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 857 int arm_arch4t = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 860 int arm_arch5t = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 863 int arm_arch5te = 0;
 864
 865 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 866 int arm_arch6 = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6K extensions.  */
 869 int arm_arch6k = 0;
 870
 871 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 872 int arm_arch6kz = 0;
 873
 874 /* Nonzero if instructions present in ARMv6-M can be used.  */
 875 int arm_arch6m = 0;
 876
 877 /* Nonzero if this chip supports the ARM 7 extensions.  */
 878 int arm_arch7 = 0;
 879
 880 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 881 int arm_arch_lpae = 0;
 882
 883 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 884 int arm_arch_notm = 0;
 885
 886 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 887 int arm_arch7em = 0;
 888
 889 /* Nonzero if instructions present in ARMv8 can be used.  */
 890 int arm_arch8 = 0;
 891
 892 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 893 int arm_arch8_1 = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 896 int arm_arch8_2 = 0;
 897
 898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 899    Architecture 8.2.  */
 900 int arm_fp16_inst = 0;
 901
 902 /* Nonzero if this chip can benefit from load scheduling.  */
 903 int arm_ld_sched = 0;
 904
 905 /* Nonzero if this chip is a StrongARM.  */
 906 int arm_tune_strongarm = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 909 int arm_arch_iwmmxt = 0;
 910
 911 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 912 int arm_arch_iwmmxt2 = 0;
 913
 914 /* Nonzero if this chip is an XScale.  */
 915 int arm_arch_xscale = 0;
 916
 917 /* Nonzero if tuning for XScale  */
 918 int arm_tune_xscale = 0;
 919
 920 /* Nonzero if we want to tune for stores that access the write-buffer.
 921    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 922 int arm_tune_wbuf = 0;
 923
 924 /* Nonzero if tuning for Cortex-A9.  */
 925 int arm_tune_cortex_a9 = 0;
 926
 927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 928    preprocessor.
 929    XXX This is a bit of a hack, it's intended to help work around
 930    problems in GLD which doesn't understand that armv5t code is
 931    interworking clean.  */
 932 int arm_cpp_interwork = 0;
 933
 934 /* Nonzero if chip supports Thumb 1.  */
 935 int arm_arch_thumb1;
 936
 937 /* Nonzero if chip supports Thumb 2.  */
 938 int arm_arch_thumb2;
 939
 940 /* Nonzero if chip supports integer division instruction.  */
 941 int arm_arch_arm_hwdiv;
 942 int arm_arch_thumb_hwdiv;
 943
 944 /* Nonzero if chip disallows volatile memory access in IT block.  */
 945 int arm_arch_no_volatile_ce;
 946
 947 /* Nonzero if we should use Neon to handle 64-bits operations rather
 948    than core registers.  */
 949 int prefer_neon_for_64bits = 0;
 950
 951 /* Nonzero if we shouldn't use literal pools.  */
 952 bool arm_disable_literal_pool = false;
 953
 954 /* The register number to be used for the PIC offset register.  */
 955 unsigned arm_pic_register = INVALID_REGNUM;
 956
 957 enum arm_pcs arm_pcs_default;
 958
 959 /* For an explanation of these variables, see final_prescan_insn below.  */
 960 int arm_ccfsm_state;
 961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 962 enum arm_cond_code arm_current_cc;
 963
 964 rtx arm_target_insn;
 965 int arm_target_label;
 966 /* The number of conditionally executed insns, including the current insn.  */
 967 int arm_condexec_count = 0;
 968 /* A bitmask specifying the patterns for the IT block.
 969    Zero means do not output an IT block before this insn. */
 970 int arm_condexec_mask = 0;
 971 /* The number of bits used in arm_condexec_mask.  */
 972 int arm_condexec_masklen = 0;
 973
 974 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 975 int arm_arch_crc = 0;
 976
 977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 978 int arm_arch_dotprod = 0;
 979
 980 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 981 int arm_arch_cmse = 0;
 982
 983 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 984 int arm_m_profile_small_mul = 0;
 985
 986 /* The condition codes of the ARM, and the inverse function.  */
 987 static const char * const arm_condition_codes[] =
 988 {
 989   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 990   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 991 };
 992
 993 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 994 int arm_regs_in_sequence[] =
 995 {
 996   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 997 };
 998
 999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code.  */
1007
1008 struct cpu_tune
1009 {
1010   enum processor_type scheduler;
1011   unsigned int tune_flags;
1012   const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017   {                                                             \
1018     num_slots,                                                  \
1019     l1_size,                                                    \
1020     l1_line_size                                                \
1021   }
1022
1023 /* arm generic vectorizer costs.  */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026   1,                                    /* scalar_stmt_cost.  */
1027   1,                                    /* scalar load_cost.  */
1028   1,                                    /* scalar_store_cost.  */
1029   1,                                    /* vec_stmt_cost.  */
1030   1,                                    /* vec_to_scalar_cost.  */
1031   1,                                    /* scalar_to_vec_cost.  */
1032   1,                                    /* vec_align_load_cost.  */
1033   1,                                    /* vec_unalign_load_cost.  */
1034   1,                                    /* vec_unalign_store_cost.  */
1035   1,                                    /* vec_store_cost.  */
1036   3,                                    /* cond_taken_branch_cost.  */
1037   1,                                    /* cond_not_taken_branch_cost.  */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047   /* ALU */
1048   {
1049     0,                  /* arith.  */
1050     0,                  /* logical.  */
1051     0,                  /* shift.  */
1052     COSTS_N_INSNS (1),  /* shift_reg.  */
1053     COSTS_N_INSNS (1),  /* arith_shift.  */
1054     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1055     0,                  /* log_shift.  */
1056     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1057     COSTS_N_INSNS (1),  /* extend.  */
1058     COSTS_N_INSNS (2),  /* extend_arith.  */
1059     COSTS_N_INSNS (1),  /* bfi.  */
1060     COSTS_N_INSNS (1),  /* bfx.  */
1061     0,                  /* clz.  */
1062     0,                  /* rev.  */
1063     0,                  /* non_exec.  */
1064     true                /* non_exec_costs_exec.  */
1065   },
1066   {
1067     /* MULT SImode */
1068     {
1069       COSTS_N_INSNS (3),        /* simple.  */
1070       COSTS_N_INSNS (3),        /* flag_setting.  */
1071       COSTS_N_INSNS (2),        /* extend.  */
1072       COSTS_N_INSNS (3),        /* add.  */
1073       COSTS_N_INSNS (2),        /* extend_add.  */
1074       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1075     },
1076     /* MULT DImode */
1077     {
1078       0,                        /* simple (N/A).  */
1079       0,                        /* flag_setting (N/A).  */
1080       COSTS_N_INSNS (4),        /* extend.  */
1081       0,                        /* add (N/A).  */
1082       COSTS_N_INSNS (4),        /* extend_add.  */
1083       0                         /* idiv (N/A).  */
1084     }
1085   },
1086   /* LD/ST */
1087   {
1088     COSTS_N_INSNS (2),  /* load.  */
1089     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1090     COSTS_N_INSNS (2),  /* ldrd.  */
1091     COSTS_N_INSNS (2),  /* ldm_1st.  */
1092     1,                  /* ldm_regs_per_insn_1st.  */
1093     2,                  /* ldm_regs_per_insn_subsequent.  */
1094     COSTS_N_INSNS (5),  /* loadf.  */
1095     COSTS_N_INSNS (5),  /* loadd.  */
1096     COSTS_N_INSNS (1),  /* load_unaligned.  */
1097     COSTS_N_INSNS (2),  /* store.  */
1098     COSTS_N_INSNS (2),  /* strd.  */
1099     COSTS_N_INSNS (2),  /* stm_1st.  */
1100     1,                  /* stm_regs_per_insn_1st.  */
1101     2,                  /* stm_regs_per_insn_subsequent.  */
1102     COSTS_N_INSNS (1),  /* storef.  */
1103     COSTS_N_INSNS (1),  /* stored.  */
1104     COSTS_N_INSNS (1),  /* store_unaligned.  */
1105     COSTS_N_INSNS (1),  /* loadv.  */
1106     COSTS_N_INSNS (1)   /* storev.  */
1107   },
1108   {
1109     /* FP SFmode */
1110     {
1111       COSTS_N_INSNS (14),       /* div.  */
1112       COSTS_N_INSNS (4),        /* mult.  */
1113       COSTS_N_INSNS (7),        /* mult_addsub. */
1114       COSTS_N_INSNS (30),       /* fma.  */
1115       COSTS_N_INSNS (3),        /* addsub.  */
1116       COSTS_N_INSNS (1),        /* fpconst.  */
1117       COSTS_N_INSNS (1),        /* neg.  */
1118       COSTS_N_INSNS (3),        /* compare.  */
1119       COSTS_N_INSNS (3),        /* widen.  */
1120       COSTS_N_INSNS (3),        /* narrow.  */
1121       COSTS_N_INSNS (3),        /* toint.  */
1122       COSTS_N_INSNS (3),        /* fromint.  */
1123       COSTS_N_INSNS (3)         /* roundint.  */
1124     },
1125     /* FP DFmode */
1126     {
1127       COSTS_N_INSNS (24),       /* div.  */
1128       COSTS_N_INSNS (5),        /* mult.  */
1129       COSTS_N_INSNS (8),        /* mult_addsub.  */
1130       COSTS_N_INSNS (30),       /* fma.  */
1131       COSTS_N_INSNS (3),        /* addsub.  */
1132       COSTS_N_INSNS (1),        /* fpconst.  */
1133       COSTS_N_INSNS (1),        /* neg.  */
1134       COSTS_N_INSNS (3),        /* compare.  */
1135       COSTS_N_INSNS (3),        /* widen.  */
1136       COSTS_N_INSNS (3),        /* narrow.  */
1137       COSTS_N_INSNS (3),        /* toint.  */
1138       COSTS_N_INSNS (3),        /* fromint.  */
1139       COSTS_N_INSNS (3)         /* roundint.  */
1140     }
1141   },
1142   /* Vector */
1143   {
1144     COSTS_N_INSNS (1)   /* alu.  */
1145   }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150   /* ALU */
1151   {
1152     0,                  /* arith.  */
1153     0,                  /* logical.  */
1154     COSTS_N_INSNS (1),  /* shift.  */
1155     0,                  /* shift_reg.  */
1156     COSTS_N_INSNS (1),  /* arith_shift.  */
1157     0,                  /* arith_shift_reg.  */
1158     COSTS_N_INSNS (1),  /* log_shift.  */
1159     0,                  /* log_shift_reg.  */
1160     0,                  /* extend.  */
1161     0,                  /* extend_arith.  */
1162     0,                  /* bfi.  */
1163     0,                  /* bfx.  */
1164     0,                  /* clz.  */
1165     0,                  /* rev.  */
1166     0,                  /* non_exec.  */
1167     true                /* non_exec_costs_exec.  */
1168   },
1169   {
1170     /* MULT SImode */
1171     {
1172       COSTS_N_INSNS (1),        /* simple.  */
1173       COSTS_N_INSNS (1),        /* flag_setting.  */
1174       COSTS_N_INSNS (1),        /* extend.  */
1175       COSTS_N_INSNS (1),        /* add.  */
1176       COSTS_N_INSNS (1),        /* extend_add.  */
1177       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1178     },
1179     /* MULT DImode */
1180     {
1181       0,                        /* simple (N/A).  */
1182       0,                        /* flag_setting (N/A).  */
1183       COSTS_N_INSNS (2),        /* extend.  */
1184       0,                        /* add (N/A).  */
1185       COSTS_N_INSNS (2),        /* extend_add.  */
1186       0                         /* idiv (N/A).  */
1187     }
1188   },
1189   /* LD/ST */
1190   {
1191     COSTS_N_INSNS (1),  /* load.  */
1192     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1193     COSTS_N_INSNS (1),  /* ldrd.  */
1194     COSTS_N_INSNS (1),  /* ldm_1st.  */
1195     1,                  /* ldm_regs_per_insn_1st.  */
1196     2,                  /* ldm_regs_per_insn_subsequent.  */
1197     COSTS_N_INSNS (1),  /* loadf.  */
1198     COSTS_N_INSNS (1),  /* loadd.  */
1199     COSTS_N_INSNS (1),  /* load_unaligned.  */
1200     COSTS_N_INSNS (1),  /* store.  */
1201     COSTS_N_INSNS (1),  /* strd.  */
1202     COSTS_N_INSNS (1),  /* stm_1st.  */
1203     1,                  /* stm_regs_per_insn_1st.  */
1204     2,                  /* stm_regs_per_insn_subsequent.  */
1205     COSTS_N_INSNS (1),  /* storef.  */
1206     COSTS_N_INSNS (1),  /* stored.  */
1207     COSTS_N_INSNS (1),  /* store_unaligned.  */
1208     COSTS_N_INSNS (1),  /* loadv.  */
1209     COSTS_N_INSNS (1)   /* storev.  */
1210   },
1211   {
1212     /* FP SFmode */
1213     {
1214       COSTS_N_INSNS (36),       /* div.  */
1215       COSTS_N_INSNS (11),       /* mult.  */
1216       COSTS_N_INSNS (20),       /* mult_addsub. */
1217       COSTS_N_INSNS (30),       /* fma.  */
1218       COSTS_N_INSNS (9),        /* addsub.  */
1219       COSTS_N_INSNS (3),        /* fpconst.  */
1220       COSTS_N_INSNS (3),        /* neg.  */
1221       COSTS_N_INSNS (6),        /* compare.  */
1222       COSTS_N_INSNS (4),        /* widen.  */
1223       COSTS_N_INSNS (4),        /* narrow.  */
1224       COSTS_N_INSNS (8),        /* toint.  */
1225       COSTS_N_INSNS (8),        /* fromint.  */
1226       COSTS_N_INSNS (8)         /* roundint.  */
1227     },
1228     /* FP DFmode */
1229     {
1230       COSTS_N_INSNS (64),       /* div.  */
1231       COSTS_N_INSNS (16),       /* mult.  */
1232       COSTS_N_INSNS (25),       /* mult_addsub.  */
1233       COSTS_N_INSNS (30),       /* fma.  */
1234       COSTS_N_INSNS (9),        /* addsub.  */
1235       COSTS_N_INSNS (3),        /* fpconst.  */
1236       COSTS_N_INSNS (3),        /* neg.  */
1237       COSTS_N_INSNS (6),        /* compare.  */
1238       COSTS_N_INSNS (6),        /* widen.  */
1239       COSTS_N_INSNS (6),        /* narrow.  */
1240       COSTS_N_INSNS (8),        /* toint.  */
1241       COSTS_N_INSNS (8),        /* fromint.  */
1242       COSTS_N_INSNS (8)         /* roundint.  */
1243     }
1244   },
1245   /* Vector */
1246   {
1247     COSTS_N_INSNS (1)   /* alu.  */
1248   }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253   /* ALU */
1254   {
1255     0,                  /* arith.  */
1256     0,                  /* logical.  */
1257     COSTS_N_INSNS (1),  /* shift.  */
1258     COSTS_N_INSNS (1),  /* shift_reg.  */
1259     COSTS_N_INSNS (1),  /* arith_shift.  */
1260     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1261     COSTS_N_INSNS (1),  /* log_shift.  */
1262     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1263     COSTS_N_INSNS (1),  /* extend.  */
1264     COSTS_N_INSNS (1),  /* extend_arith.  */
1265     COSTS_N_INSNS (1),  /* bfi.  */
1266     COSTS_N_INSNS (1),  /* bfx.  */
1267     COSTS_N_INSNS (1),  /* clz.  */
1268     COSTS_N_INSNS (1),  /* rev.  */
1269     0,                  /* non_exec.  */
1270     true                /* non_exec_costs_exec.  */
1271   },
1272
1273   {
1274     /* MULT SImode */
1275     {
1276       0,                        /* simple.  */
1277       COSTS_N_INSNS (1),        /* flag_setting.  */
1278       COSTS_N_INSNS (1),        /* extend.  */
1279       COSTS_N_INSNS (1),        /* add.  */
1280       COSTS_N_INSNS (1),        /* extend_add.  */
1281       COSTS_N_INSNS (7)         /* idiv.  */
1282     },
1283     /* MULT DImode */
1284     {
1285       0,                        /* simple (N/A).  */
1286       0,                        /* flag_setting (N/A).  */
1287       COSTS_N_INSNS (1),        /* extend.  */
1288       0,                        /* add.  */
1289       COSTS_N_INSNS (2),        /* extend_add.  */
1290       0                         /* idiv (N/A).  */
1291     }
1292   },
1293   /* LD/ST */
1294   {
1295     COSTS_N_INSNS (1),  /* load.  */
1296     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1297     COSTS_N_INSNS (6),  /* ldrd.  */
1298     COSTS_N_INSNS (1),  /* ldm_1st.  */
1299     1,                  /* ldm_regs_per_insn_1st.  */
1300     2,                  /* ldm_regs_per_insn_subsequent.  */
1301     COSTS_N_INSNS (2),  /* loadf.  */
1302     COSTS_N_INSNS (4),  /* loadd.  */
1303     COSTS_N_INSNS (1),  /* load_unaligned.  */
1304     COSTS_N_INSNS (1),  /* store.  */
1305     COSTS_N_INSNS (3),  /* strd.  */
1306     COSTS_N_INSNS (1),  /* stm_1st.  */
1307     1,                  /* stm_regs_per_insn_1st.  */
1308     2,                  /* stm_regs_per_insn_subsequent.  */
1309     COSTS_N_INSNS (2),  /* storef.  */
1310     COSTS_N_INSNS (2),  /* stored.  */
1311     COSTS_N_INSNS (1),  /* store_unaligned.  */
1312     COSTS_N_INSNS (1),  /* loadv.  */
1313     COSTS_N_INSNS (1)   /* storev.  */
1314   },
1315   {
1316     /* FP SFmode */
1317     {
1318       COSTS_N_INSNS (15),       /* div.  */
1319       COSTS_N_INSNS (3),        /* mult.  */
1320       COSTS_N_INSNS (7),        /* mult_addsub. */
1321       COSTS_N_INSNS (7),        /* fma.  */
1322       COSTS_N_INSNS (3),        /* addsub.  */
1323       COSTS_N_INSNS (3),        /* fpconst.  */
1324       COSTS_N_INSNS (3),        /* neg.  */
1325       COSTS_N_INSNS (3),        /* compare.  */
1326       COSTS_N_INSNS (3),        /* widen.  */
1327       COSTS_N_INSNS (3),        /* narrow.  */
1328       COSTS_N_INSNS (3),        /* toint.  */
1329       COSTS_N_INSNS (3),        /* fromint.  */
1330       COSTS_N_INSNS (3)         /* roundint.  */
1331     },
1332     /* FP DFmode */
1333     {
1334       COSTS_N_INSNS (30),       /* div.  */
1335       COSTS_N_INSNS (6),        /* mult.  */
1336       COSTS_N_INSNS (10),       /* mult_addsub.  */
1337       COSTS_N_INSNS (7),        /* fma.  */
1338       COSTS_N_INSNS (3),        /* addsub.  */
1339       COSTS_N_INSNS (3),        /* fpconst.  */
1340       COSTS_N_INSNS (3),        /* neg.  */
1341       COSTS_N_INSNS (3),        /* compare.  */
1342       COSTS_N_INSNS (3),        /* widen.  */
1343       COSTS_N_INSNS (3),        /* narrow.  */
1344       COSTS_N_INSNS (3),        /* toint.  */
1345       COSTS_N_INSNS (3),        /* fromint.  */
1346       COSTS_N_INSNS (3)         /* roundint.  */
1347     }
1348   },
1349   /* Vector */
1350   {
1351     COSTS_N_INSNS (1)   /* alu.  */
1352   }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358   /* ALU */
1359   {
1360     0,                  /* arith.  */
1361     0,                  /* logical.  */
1362     COSTS_N_INSNS (1),  /* shift.  */
1363     COSTS_N_INSNS (1),  /* shift_reg.  */
1364     COSTS_N_INSNS (1),  /* arith_shift.  */
1365     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1366     COSTS_N_INSNS (1),  /* log_shift.  */
1367     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1368     COSTS_N_INSNS (1),  /* extend.  */
1369     COSTS_N_INSNS (1),  /* extend_arith.  */
1370     COSTS_N_INSNS (1),  /* bfi.  */
1371     COSTS_N_INSNS (1),  /* bfx.  */
1372     COSTS_N_INSNS (1),  /* clz.  */
1373     COSTS_N_INSNS (1),  /* rev.  */
1374     0,                  /* non_exec.  */
1375     true                /* non_exec_costs_exec.  */
1376   },
1377
1378   {
1379     /* MULT SImode */
1380     {
1381       0,                        /* simple.  */
1382       COSTS_N_INSNS (1),        /* flag_setting.  */
1383       COSTS_N_INSNS (1),        /* extend.  */
1384       COSTS_N_INSNS (1),        /* add.  */
1385       COSTS_N_INSNS (1),        /* extend_add.  */
1386       COSTS_N_INSNS (7)         /* idiv.  */
1387     },
1388     /* MULT DImode */
1389     {
1390       0,                        /* simple (N/A).  */
1391       0,                        /* flag_setting (N/A).  */
1392       COSTS_N_INSNS (1),        /* extend.  */
1393       0,                        /* add.  */
1394       COSTS_N_INSNS (2),        /* extend_add.  */
1395       0                         /* idiv (N/A).  */
1396     }
1397   },
1398   /* LD/ST */
1399   {
1400     COSTS_N_INSNS (1),  /* load.  */
1401     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1402     COSTS_N_INSNS (3),  /* ldrd.  */
1403     COSTS_N_INSNS (1),  /* ldm_1st.  */
1404     1,                  /* ldm_regs_per_insn_1st.  */
1405     2,                  /* ldm_regs_per_insn_subsequent.  */
1406     COSTS_N_INSNS (2),  /* loadf.  */
1407     COSTS_N_INSNS (2),  /* loadd.  */
1408     COSTS_N_INSNS (1),  /* load_unaligned.  */
1409     COSTS_N_INSNS (1),  /* store.  */
1410     COSTS_N_INSNS (3),  /* strd.  */
1411     COSTS_N_INSNS (1),  /* stm_1st.  */
1412     1,                  /* stm_regs_per_insn_1st.  */
1413     2,                  /* stm_regs_per_insn_subsequent.  */
1414     COSTS_N_INSNS (2),  /* storef.  */
1415     COSTS_N_INSNS (2),  /* stored.  */
1416     COSTS_N_INSNS (1),  /* store_unaligned.  */
1417     COSTS_N_INSNS (1),  /* loadv.  */
1418     COSTS_N_INSNS (1)   /* storev.  */
1419   },
1420   {
1421     /* FP SFmode */
1422     {
1423       COSTS_N_INSNS (15),       /* div.  */
1424       COSTS_N_INSNS (3),        /* mult.  */
1425       COSTS_N_INSNS (7),        /* mult_addsub. */
1426       COSTS_N_INSNS (7),        /* fma.  */
1427       COSTS_N_INSNS (3),        /* addsub.  */
1428       COSTS_N_INSNS (3),        /* fpconst.  */
1429       COSTS_N_INSNS (3),        /* neg.  */
1430       COSTS_N_INSNS (3),        /* compare.  */
1431       COSTS_N_INSNS (3),        /* widen.  */
1432       COSTS_N_INSNS (3),        /* narrow.  */
1433       COSTS_N_INSNS (3),        /* toint.  */
1434       COSTS_N_INSNS (3),        /* fromint.  */
1435       COSTS_N_INSNS (3)         /* roundint.  */
1436     },
1437     /* FP DFmode */
1438     {
1439       COSTS_N_INSNS (30),       /* div.  */
1440       COSTS_N_INSNS (6),        /* mult.  */
1441       COSTS_N_INSNS (10),       /* mult_addsub.  */
1442       COSTS_N_INSNS (7),        /* fma.  */
1443       COSTS_N_INSNS (3),        /* addsub.  */
1444       COSTS_N_INSNS (3),        /* fpconst.  */
1445       COSTS_N_INSNS (3),        /* neg.  */
1446       COSTS_N_INSNS (3),        /* compare.  */
1447       COSTS_N_INSNS (3),        /* widen.  */
1448       COSTS_N_INSNS (3),        /* narrow.  */
1449       COSTS_N_INSNS (3),        /* toint.  */
1450       COSTS_N_INSNS (3),        /* fromint.  */
1451       COSTS_N_INSNS (3)         /* roundint.  */
1452     }
1453   },
1454   /* Vector */
1455   {
1456     COSTS_N_INSNS (1)   /* alu.  */
1457   }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462   /* ALU */
1463   {
1464     0,                  /* arith.  */
1465     0,                  /* logical.  */
1466     0,                  /* shift.  */
1467     COSTS_N_INSNS (1),  /* shift_reg.  */
1468     COSTS_N_INSNS (1),  /* arith_shift.  */
1469     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1470     COSTS_N_INSNS (1),  /* log_shift.  */
1471     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1472     0,                  /* extend.  */
1473     COSTS_N_INSNS (1),  /* extend_arith.  */
1474     0,                  /* bfi.  */
1475     COSTS_N_INSNS (1),  /* bfx.  */
1476     COSTS_N_INSNS (1),  /* clz.  */
1477     COSTS_N_INSNS (1),  /* rev.  */
1478     0,                  /* non_exec.  */
1479     true                /* non_exec_costs_exec.  */
1480   },
1481   /* MULT SImode */
1482   {
1483     {
1484       COSTS_N_INSNS (2),        /* simple.  */
1485       COSTS_N_INSNS (3),        /* flag_setting.  */
1486       COSTS_N_INSNS (2),        /* extend.  */
1487       COSTS_N_INSNS (3),        /* add.  */
1488       COSTS_N_INSNS (2),        /* extend_add.  */
1489       COSTS_N_INSNS (18)        /* idiv.  */
1490     },
1491     /* MULT DImode */
1492     {
1493       0,                        /* simple (N/A).  */
1494       0,                        /* flag_setting (N/A).  */
1495       COSTS_N_INSNS (3),        /* extend.  */
1496       0,                        /* add (N/A).  */
1497       COSTS_N_INSNS (3),        /* extend_add.  */
1498       0                         /* idiv (N/A).  */
1499     }
1500   },
1501   /* LD/ST */
1502   {
1503     COSTS_N_INSNS (3),  /* load.  */
1504     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1505     COSTS_N_INSNS (3),  /* ldrd.  */
1506     COSTS_N_INSNS (3),  /* ldm_1st.  */
1507     1,                  /* ldm_regs_per_insn_1st.  */
1508     2,                  /* ldm_regs_per_insn_subsequent.  */
1509     COSTS_N_INSNS (3),  /* loadf.  */
1510     COSTS_N_INSNS (3),  /* loadd.  */
1511     0,                  /* load_unaligned.  */
1512     0,                  /* store.  */
1513     0,                  /* strd.  */
1514     0,                  /* stm_1st.  */
1515     1,                  /* stm_regs_per_insn_1st.  */
1516     2,                  /* stm_regs_per_insn_subsequent.  */
1517     COSTS_N_INSNS (2),  /* storef.  */
1518     COSTS_N_INSNS (2),  /* stored.  */
1519     0,                  /* store_unaligned.  */
1520     COSTS_N_INSNS (1),  /* loadv.  */
1521     COSTS_N_INSNS (1)   /* storev.  */
1522   },
1523   {
1524     /* FP SFmode */
1525     {
1526       COSTS_N_INSNS (17),       /* div.  */
1527       COSTS_N_INSNS (4),        /* mult.  */
1528       COSTS_N_INSNS (8),        /* mult_addsub. */
1529       COSTS_N_INSNS (8),        /* fma.  */
1530       COSTS_N_INSNS (4),        /* addsub.  */
1531       COSTS_N_INSNS (2),        /* fpconst. */
1532       COSTS_N_INSNS (2),        /* neg.  */
1533       COSTS_N_INSNS (2),        /* compare.  */
1534       COSTS_N_INSNS (4),        /* widen.  */
1535       COSTS_N_INSNS (4),        /* narrow.  */
1536       COSTS_N_INSNS (4),        /* toint.  */
1537       COSTS_N_INSNS (4),        /* fromint.  */
1538       COSTS_N_INSNS (4)         /* roundint.  */
1539     },
1540     /* FP DFmode */
1541     {
1542       COSTS_N_INSNS (31),       /* div.  */
1543       COSTS_N_INSNS (4),        /* mult.  */
1544       COSTS_N_INSNS (8),        /* mult_addsub.  */
1545       COSTS_N_INSNS (8),        /* fma.  */
1546       COSTS_N_INSNS (4),        /* addsub.  */
1547       COSTS_N_INSNS (2),        /* fpconst.  */
1548       COSTS_N_INSNS (2),        /* neg.  */
1549       COSTS_N_INSNS (2),        /* compare.  */
1550       COSTS_N_INSNS (4),        /* widen.  */
1551       COSTS_N_INSNS (4),        /* narrow.  */
1552       COSTS_N_INSNS (4),        /* toint.  */
1553       COSTS_N_INSNS (4),        /* fromint.  */
1554       COSTS_N_INSNS (4)         /* roundint.  */
1555     }
1556   },
1557   /* Vector */
1558   {
1559     COSTS_N_INSNS (1)   /* alu.  */
1560   }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565   /* ALU */
1566   {
1567     0,                  /* arith.  */
1568     0,                  /* logical.  */
1569     0,                  /* shift.  */
1570     0,                  /* shift_reg.  */
1571     COSTS_N_INSNS (1),  /* arith_shift.  */
1572     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1573     COSTS_N_INSNS (1),  /* log_shift.  */
1574     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1575     0,                  /* extend.  */
1576     COSTS_N_INSNS (1),  /* extend_arith.  */
1577     COSTS_N_INSNS (1),  /* bfi.  */
1578     0,                  /* bfx.  */
1579     0,                  /* clz.  */
1580     0,                  /* rev.  */
1581     0,                  /* non_exec.  */
1582     true                /* non_exec_costs_exec.  */
1583   },
1584   /* MULT SImode */
1585   {
1586     {
1587       COSTS_N_INSNS (2),        /* simple.  */
1588       COSTS_N_INSNS (3),        /* flag_setting.  */
1589       COSTS_N_INSNS (2),        /* extend.  */
1590       COSTS_N_INSNS (2),        /* add.  */
1591       COSTS_N_INSNS (2),        /* extend_add.  */
1592       COSTS_N_INSNS (18)        /* idiv.  */
1593     },
1594     /* MULT DImode */
1595     {
1596       0,                        /* simple (N/A).  */
1597       0,                        /* flag_setting (N/A).  */
1598       COSTS_N_INSNS (3),        /* extend.  */
1599       0,                        /* add (N/A).  */
1600       COSTS_N_INSNS (3),        /* extend_add.  */
1601       0                         /* idiv (N/A).  */
1602     }
1603   },
1604   /* LD/ST */
1605   {
1606     COSTS_N_INSNS (3),  /* load.  */
1607     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1608     COSTS_N_INSNS (3),  /* ldrd.  */
1609     COSTS_N_INSNS (4),  /* ldm_1st.  */
1610     1,                  /* ldm_regs_per_insn_1st.  */
1611     2,                  /* ldm_regs_per_insn_subsequent.  */
1612     COSTS_N_INSNS (4),  /* loadf.  */
1613     COSTS_N_INSNS (4),  /* loadd.  */
1614     0,                  /* load_unaligned.  */
1615     0,                  /* store.  */
1616     0,                  /* strd.  */
1617     COSTS_N_INSNS (1),  /* stm_1st.  */
1618     1,                  /* stm_regs_per_insn_1st.  */
1619     2,                  /* stm_regs_per_insn_subsequent.  */
1620     0,                  /* storef.  */
1621     0,                  /* stored.  */
1622     0,                  /* store_unaligned.  */
1623     COSTS_N_INSNS (1),  /* loadv.  */
1624     COSTS_N_INSNS (1)   /* storev.  */
1625   },
1626   {
1627     /* FP SFmode */
1628     {
1629       COSTS_N_INSNS (17),       /* div.  */
1630       COSTS_N_INSNS (4),        /* mult.  */
1631       COSTS_N_INSNS (8),        /* mult_addsub. */
1632       COSTS_N_INSNS (8),        /* fma.  */
1633       COSTS_N_INSNS (4),        /* addsub.  */
1634       COSTS_N_INSNS (2),        /* fpconst. */
1635       COSTS_N_INSNS (2),        /* neg.  */
1636       COSTS_N_INSNS (5),        /* compare.  */
1637       COSTS_N_INSNS (4),        /* widen.  */
1638       COSTS_N_INSNS (4),        /* narrow.  */
1639       COSTS_N_INSNS (4),        /* toint.  */
1640       COSTS_N_INSNS (4),        /* fromint.  */
1641       COSTS_N_INSNS (4)         /* roundint.  */
1642     },
1643     /* FP DFmode */
1644     {
1645       COSTS_N_INSNS (31),       /* div.  */
1646       COSTS_N_INSNS (4),        /* mult.  */
1647       COSTS_N_INSNS (8),        /* mult_addsub.  */
1648       COSTS_N_INSNS (8),        /* fma.  */
1649       COSTS_N_INSNS (4),        /* addsub.  */
1650       COSTS_N_INSNS (2),        /* fpconst.  */
1651       COSTS_N_INSNS (2),        /* neg.  */
1652       COSTS_N_INSNS (2),        /* compare.  */
1653       COSTS_N_INSNS (4),        /* widen.  */
1654       COSTS_N_INSNS (4),        /* narrow.  */
1655       COSTS_N_INSNS (4),        /* toint.  */
1656       COSTS_N_INSNS (4),        /* fromint.  */
1657       COSTS_N_INSNS (4)         /* roundint.  */
1658     }
1659   },
1660   /* Vector */
1661   {
1662     COSTS_N_INSNS (1)   /* alu.  */
1663   }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668   /* ALU */
1669   {
1670     0,                  /* arith.  */
1671     0,                  /* logical.  */
1672     0,                  /* shift.  */
1673     0,                  /* shift_reg.  */
1674     0,                  /* arith_shift.  */
1675     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1676     0,                  /* log_shift.  */
1677     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1678     0,                  /* extend.  */
1679     COSTS_N_INSNS (1),  /* extend_arith.  */
1680     0,                  /* bfi.  */
1681     0,                  /* bfx.  */
1682     0,                  /* clz.  */
1683     0,                  /* rev.  */
1684     COSTS_N_INSNS (1),  /* non_exec.  */
1685     false               /* non_exec_costs_exec.  */
1686   },
1687   {
1688     /* MULT SImode */
1689     {
1690       COSTS_N_INSNS (1),        /* simple.  */
1691       COSTS_N_INSNS (1),        /* flag_setting.  */
1692       COSTS_N_INSNS (2),        /* extend.  */
1693       COSTS_N_INSNS (1),        /* add.  */
1694       COSTS_N_INSNS (3),        /* extend_add.  */
1695       COSTS_N_INSNS (8)         /* idiv.  */
1696     },
1697     /* MULT DImode */
1698     {
1699       0,                        /* simple (N/A).  */
1700       0,                        /* flag_setting (N/A).  */
1701       COSTS_N_INSNS (2),        /* extend.  */
1702       0,                        /* add (N/A).  */
1703       COSTS_N_INSNS (3),        /* extend_add.  */
1704       0                         /* idiv (N/A).  */
1705     }
1706   },
1707   /* LD/ST */
1708   {
1709     COSTS_N_INSNS (2),  /* load.  */
1710     0,                  /* load_sign_extend.  */
1711     COSTS_N_INSNS (3),  /* ldrd.  */
1712     COSTS_N_INSNS (2),  /* ldm_1st.  */
1713     1,                  /* ldm_regs_per_insn_1st.  */
1714     1,                  /* ldm_regs_per_insn_subsequent.  */
1715     COSTS_N_INSNS (2),  /* loadf.  */
1716     COSTS_N_INSNS (3),  /* loadd.  */
1717     COSTS_N_INSNS (1),  /* load_unaligned.  */
1718     COSTS_N_INSNS (2),  /* store.  */
1719     COSTS_N_INSNS (3),  /* strd.  */
1720     COSTS_N_INSNS (2),  /* stm_1st.  */
1721     1,                  /* stm_regs_per_insn_1st.  */
1722     1,                  /* stm_regs_per_insn_subsequent.  */
1723     COSTS_N_INSNS (2),  /* storef.  */
1724     COSTS_N_INSNS (3),  /* stored.  */
1725     COSTS_N_INSNS (1),  /* store_unaligned.  */
1726     COSTS_N_INSNS (1),  /* loadv.  */
1727     COSTS_N_INSNS (1)   /* storev.  */
1728   },
1729   {
1730     /* FP SFmode */
1731     {
1732       COSTS_N_INSNS (7),        /* div.  */
1733       COSTS_N_INSNS (2),        /* mult.  */
1734       COSTS_N_INSNS (5),        /* mult_addsub.  */
1735       COSTS_N_INSNS (3),        /* fma.  */
1736       COSTS_N_INSNS (1),        /* addsub.  */
1737       0,                        /* fpconst.  */
1738       0,                        /* neg.  */
1739       0,                        /* compare.  */
1740       0,                        /* widen.  */
1741       0,                        /* narrow.  */
1742       0,                        /* toint.  */
1743       0,                        /* fromint.  */
1744       0                         /* roundint.  */
1745     },
1746     /* FP DFmode */
1747     {
1748       COSTS_N_INSNS (15),       /* div.  */
1749       COSTS_N_INSNS (5),        /* mult.  */
1750       COSTS_N_INSNS (7),        /* mult_addsub.  */
1751       COSTS_N_INSNS (7),        /* fma.  */
1752       COSTS_N_INSNS (3),        /* addsub.  */
1753       0,                        /* fpconst.  */
1754       0,                        /* neg.  */
1755       0,                        /* compare.  */
1756       0,                        /* widen.  */
1757       0,                        /* narrow.  */
1758       0,                        /* toint.  */
1759       0,                        /* fromint.  */
1760       0                         /* roundint.  */
1761     }
1762   },
1763   /* Vector */
1764   {
1765     COSTS_N_INSNS (1)   /* alu.  */
1766   }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771   /* int.  */
1772   {
1773     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1774     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1775     COSTS_N_INSNS (0)   /* AMO_WB.  */
1776   },
1777   /* float.  */
1778   {
1779     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)   /* AMO_WB.  */
1782   },
1783   /* vector.  */
1784   {
1785     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)   /* AMO_WB.  */
1788   }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793   &generic_extra_costs,                 /* Insn extra costs.  */
1794   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1795   NULL,                                 /* Sched adj cost.  */
1796   arm_default_branch_cost,
1797   &arm_default_vec_cost,
1798   3,                                            /* Constant limit.  */
1799   5,                                            /* Max cond insns.  */
1800   8,                                            /* Memset max inline.  */
1801   1,                                            /* Issue rate.  */
1802   ARM_PREFETCH_NOT_BENEFICIAL,
1803   tune_params::PREF_CONST_POOL_TRUE,
1804   tune_params::PREF_LDRD_FALSE,
1805   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1806   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1807   tune_params::DISPARAGE_FLAGS_NEITHER,
1808   tune_params::PREF_NEON_64_FALSE,
1809   tune_params::PREF_NEON_STRINGOPS_FALSE,
1810   tune_params::FUSE_NOTHING,
1811   tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816   &generic_extra_costs,                 /* Insn extra costs.  */
1817   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1818   NULL,                                 /* Sched adj cost.  */
1819   arm_default_branch_cost,
1820   &arm_default_vec_cost,
1821   1,                                            /* Constant limit.  */
1822   5,                                            /* Max cond insns.  */
1823   8,                                            /* Memset max inline.  */
1824   1,                                            /* Issue rate.  */
1825   ARM_PREFETCH_NOT_BENEFICIAL,
1826   tune_params::PREF_CONST_POOL_TRUE,
1827   tune_params::PREF_LDRD_FALSE,
1828   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1829   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1830   tune_params::DISPARAGE_FLAGS_NEITHER,
1831   tune_params::PREF_NEON_64_FALSE,
1832   tune_params::PREF_NEON_STRINGOPS_FALSE,
1833   tune_params::FUSE_NOTHING,
1834   tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842   &generic_extra_costs,                 /* Insn extra costs.  */
1843   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1844   NULL,                                 /* Sched adj cost.  */
1845   arm_default_branch_cost,
1846   &arm_default_vec_cost,
1847   1,                                            /* Constant limit.  */
1848   3,                                            /* Max cond insns.  */
1849   8,                                            /* Memset max inline.  */
1850   1,                                            /* Issue rate.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   tune_params::PREF_CONST_POOL_TRUE,
1853   tune_params::PREF_LDRD_FALSE,
1854   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1855   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1856   tune_params::DISPARAGE_FLAGS_NEITHER,
1857   tune_params::PREF_NEON_64_FALSE,
1858   tune_params::PREF_NEON_STRINGOPS_FALSE,
1859   tune_params::FUSE_NOTHING,
1860   tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865   &generic_extra_costs,                 /* Insn extra costs.  */
1866   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1867   xscale_sched_adjust_cost,
1868   arm_default_branch_cost,
1869   &arm_default_vec_cost,
1870   2,                                            /* Constant limit.  */
1871   3,                                            /* Max cond insns.  */
1872   8,                                            /* Memset max inline.  */
1873   1,                                            /* Issue rate.  */
1874   ARM_PREFETCH_NOT_BENEFICIAL,
1875   tune_params::PREF_CONST_POOL_TRUE,
1876   tune_params::PREF_LDRD_FALSE,
1877   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1878   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1879   tune_params::DISPARAGE_FLAGS_NEITHER,
1880   tune_params::PREF_NEON_64_FALSE,
1881   tune_params::PREF_NEON_STRINGOPS_FALSE,
1882   tune_params::FUSE_NOTHING,
1883   tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888   &generic_extra_costs,                 /* Insn extra costs.  */
1889   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1890   NULL,                                 /* Sched adj cost.  */
1891   arm_default_branch_cost,
1892   &arm_default_vec_cost,
1893   1,                                            /* Constant limit.  */
1894   5,                                            /* Max cond insns.  */
1895   8,                                            /* Memset max inline.  */
1896   1,                                            /* Issue rate.  */
1897   ARM_PREFETCH_NOT_BENEFICIAL,
1898   tune_params::PREF_CONST_POOL_TRUE,
1899   tune_params::PREF_LDRD_FALSE,
1900   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1902   tune_params::DISPARAGE_FLAGS_NEITHER,
1903   tune_params::PREF_NEON_64_FALSE,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911   &generic_extra_costs,                 /* Insn extra costs.  */
1912   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1913   NULL,                                 /* Sched adj cost.  */
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   1,                                            /* Constant limit.  */
1917   5,                                            /* Max cond insns.  */
1918   8,                                            /* Memset max inline.  */
1919   2,                                            /* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_64_FALSE,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934   &generic_extra_costs,                 /* Insn extra costs.  */
1935   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1936   NULL,                                 /* Sched adj cost.  */
1937   arm_default_branch_cost,
1938   &arm_default_vec_cost,
1939   1,                                            /* Constant limit.  */
1940   5,                                            /* Max cond insns.  */
1941   8,                                            /* Memset max inline.  */
1942   1,                                            /* Issue rate.  */
1943   ARM_PREFETCH_NOT_BENEFICIAL,
1944   tune_params::PREF_CONST_POOL_FALSE,
1945   tune_params::PREF_LDRD_FALSE,
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1947   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1948   tune_params::DISPARAGE_FLAGS_NEITHER,
1949   tune_params::PREF_NEON_64_FALSE,
1950   tune_params::PREF_NEON_STRINGOPS_FALSE,
1951   tune_params::FUSE_NOTHING,
1952   tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959   &generic_extra_costs,
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   NULL,                                 /* Sched adj cost.  */
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   1,                                            /* Constant limit.  */
1965   5,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   2,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_FALSE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_64_FALSE,
1975   tune_params::PREF_NEON_STRINGOPS_FALSE,
1976   tune_params::FUSE_NOTHING,
1977   tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982   &cortexa8_extra_costs,
1983   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1984   NULL,                                 /* Sched adj cost.  */
1985   arm_default_branch_cost,
1986   &arm_default_vec_cost,
1987   1,                                            /* Constant limit.  */
1988   5,                                            /* Max cond insns.  */
1989   8,                                            /* Memset max inline.  */
1990   2,                                            /* Issue rate.  */
1991   ARM_PREFETCH_NOT_BENEFICIAL,
1992   tune_params::PREF_CONST_POOL_FALSE,
1993   tune_params::PREF_LDRD_FALSE,
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1995   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1996   tune_params::DISPARAGE_FLAGS_NEITHER,
1997   tune_params::PREF_NEON_64_FALSE,
1998   tune_params::PREF_NEON_STRINGOPS_TRUE,
1999   tune_params::FUSE_NOTHING,
2000   tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005   &cortexa7_extra_costs,
2006   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2007   NULL,                                 /* Sched adj cost.  */
2008   arm_default_branch_cost,
2009   &arm_default_vec_cost,
2010   1,                                            /* Constant limit.  */
2011   5,                                            /* Max cond insns.  */
2012   8,                                            /* Memset max inline.  */
2013   2,                                            /* Issue rate.  */
2014   ARM_PREFETCH_NOT_BENEFICIAL,
2015   tune_params::PREF_CONST_POOL_FALSE,
2016   tune_params::PREF_LDRD_FALSE,
2017   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2018   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2019   tune_params::DISPARAGE_FLAGS_NEITHER,
2020   tune_params::PREF_NEON_64_FALSE,
2021   tune_params::PREF_NEON_STRINGOPS_TRUE,
2022   tune_params::FUSE_NOTHING,
2023   tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028   &cortexa15_extra_costs,
2029   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2030   NULL,                                 /* Sched adj cost.  */
2031   arm_default_branch_cost,
2032   &arm_default_vec_cost,
2033   1,                                            /* Constant limit.  */
2034   2,                                            /* Max cond insns.  */
2035   8,                                            /* Memset max inline.  */
2036   3,                                            /* Issue rate.  */
2037   ARM_PREFETCH_NOT_BENEFICIAL,
2038   tune_params::PREF_CONST_POOL_FALSE,
2039   tune_params::PREF_LDRD_TRUE,
2040   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2041   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2042   tune_params::DISPARAGE_FLAGS_ALL,
2043   tune_params::PREF_NEON_64_FALSE,
2044   tune_params::PREF_NEON_STRINGOPS_TRUE,
2045   tune_params::FUSE_NOTHING,
2046   tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051   &cortexa53_extra_costs,
2052   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2053   NULL,                                 /* Sched adj cost.  */
2054   arm_default_branch_cost,
2055   &arm_default_vec_cost,
2056   1,                                            /* Constant limit.  */
2057   5,                                            /* Max cond insns.  */
2058   8,                                            /* Memset max inline.  */
2059   1,                                            /* Issue rate.  */
2060   ARM_PREFETCH_NOT_BENEFICIAL,
2061   tune_params::PREF_CONST_POOL_FALSE,
2062   tune_params::PREF_LDRD_FALSE,
2063   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2064   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2065   tune_params::DISPARAGE_FLAGS_NEITHER,
2066   tune_params::PREF_NEON_64_FALSE,
2067   tune_params::PREF_NEON_STRINGOPS_TRUE,
2068   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069   tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074   &cortexa53_extra_costs,
2075   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2076   NULL,                                 /* Sched adj cost.  */
2077   arm_default_branch_cost,
2078   &arm_default_vec_cost,
2079   1,                                            /* Constant limit.  */
2080   5,                                            /* Max cond insns.  */
2081   8,                                            /* Memset max inline.  */
2082   2,                                            /* Issue rate.  */
2083   ARM_PREFETCH_NOT_BENEFICIAL,
2084   tune_params::PREF_CONST_POOL_FALSE,
2085   tune_params::PREF_LDRD_FALSE,
2086   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2087   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2088   tune_params::DISPARAGE_FLAGS_NEITHER,
2089   tune_params::PREF_NEON_64_FALSE,
2090   tune_params::PREF_NEON_STRINGOPS_TRUE,
2091   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092   tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097   &cortexa57_extra_costs,
2098   &generic_addr_mode_costs,             /* addressing mode costs */
2099   NULL,                                 /* Sched adj cost.  */
2100   arm_default_branch_cost,
2101   &arm_default_vec_cost,
2102   1,                                            /* Constant limit.  */
2103   2,                                            /* Max cond insns.  */
2104   8,                                            /* Memset max inline.  */
2105   3,                                            /* Issue rate.  */
2106   ARM_PREFETCH_NOT_BENEFICIAL,
2107   tune_params::PREF_CONST_POOL_FALSE,
2108   tune_params::PREF_LDRD_TRUE,
2109   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2110   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2111   tune_params::DISPARAGE_FLAGS_ALL,
2112   tune_params::PREF_NEON_64_FALSE,
2113   tune_params::PREF_NEON_STRINGOPS_TRUE,
2114   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115   tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120   &exynosm1_extra_costs,
2121   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2122   NULL,                                         /* Sched adj cost.  */
2123   arm_default_branch_cost,
2124   &arm_default_vec_cost,
2125   1,                                            /* Constant limit.  */
2126   2,                                            /* Max cond insns.  */
2127   8,                                            /* Memset max inline.  */
2128   3,                                            /* Issue rate.  */
2129   ARM_PREFETCH_NOT_BENEFICIAL,
2130   tune_params::PREF_CONST_POOL_FALSE,
2131   tune_params::PREF_LDRD_TRUE,
2132   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2133   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2134   tune_params::DISPARAGE_FLAGS_ALL,
2135   tune_params::PREF_NEON_64_FALSE,
2136   tune_params::PREF_NEON_STRINGOPS_TRUE,
2137   tune_params::FUSE_NOTHING,
2138   tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143   &xgene1_extra_costs,
2144   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2145   NULL,                                 /* Sched adj cost.  */
2146   arm_default_branch_cost,
2147   &arm_default_vec_cost,
2148   1,                                            /* Constant limit.  */
2149   2,                                            /* Max cond insns.  */
2150   32,                                           /* Memset max inline.  */
2151   4,                                            /* Issue rate.  */
2152   ARM_PREFETCH_NOT_BENEFICIAL,
2153   tune_params::PREF_CONST_POOL_FALSE,
2154   tune_params::PREF_LDRD_TRUE,
2155   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2156   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2157   tune_params::DISPARAGE_FLAGS_ALL,
2158   tune_params::PREF_NEON_64_FALSE,
2159   tune_params::PREF_NEON_STRINGOPS_FALSE,
2160   tune_params::FUSE_NOTHING,
2161   tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165    less appealing.  Set max_insns_skipped to a low value.  */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169   &cortexa5_extra_costs,
2170   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2171   NULL,                                 /* Sched adj cost.  */
2172   arm_cortex_a5_branch_cost,
2173   &arm_default_vec_cost,
2174   1,                                            /* Constant limit.  */
2175   1,                                            /* Max cond insns.  */
2176   8,                                            /* Memset max inline.  */
2177   2,                                            /* Issue rate.  */
2178   ARM_PREFETCH_NOT_BENEFICIAL,
2179   tune_params::PREF_CONST_POOL_FALSE,
2180   tune_params::PREF_LDRD_FALSE,
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2182   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2183   tune_params::DISPARAGE_FLAGS_NEITHER,
2184   tune_params::PREF_NEON_64_FALSE,
2185   tune_params::PREF_NEON_STRINGOPS_TRUE,
2186   tune_params::FUSE_NOTHING,
2187   tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192   &cortexa9_extra_costs,
2193   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2194   cortex_a9_sched_adjust_cost,
2195   arm_default_branch_cost,
2196   &arm_default_vec_cost,
2197   1,                                            /* Constant limit.  */
2198   5,                                            /* Max cond insns.  */
2199   8,                                            /* Memset max inline.  */
2200   2,                                            /* Issue rate.  */
2201   ARM_PREFETCH_BENEFICIAL(4,32,32),
2202   tune_params::PREF_CONST_POOL_FALSE,
2203   tune_params::PREF_LDRD_FALSE,
2204   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2206   tune_params::DISPARAGE_FLAGS_NEITHER,
2207   tune_params::PREF_NEON_64_FALSE,
2208   tune_params::PREF_NEON_STRINGOPS_FALSE,
2209   tune_params::FUSE_NOTHING,
2210   tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215   &cortexa12_extra_costs,
2216   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2217   NULL,                                 /* Sched adj cost.  */
2218   arm_default_branch_cost,
2219   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2220   1,                                            /* Constant limit.  */
2221   2,                                            /* Max cond insns.  */
2222   8,                                            /* Memset max inline.  */
2223   2,                                            /* Issue rate.  */
2224   ARM_PREFETCH_NOT_BENEFICIAL,
2225   tune_params::PREF_CONST_POOL_FALSE,
2226   tune_params::PREF_LDRD_TRUE,
2227   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2228   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2229   tune_params::DISPARAGE_FLAGS_ALL,
2230   tune_params::PREF_NEON_64_FALSE,
2231   tune_params::PREF_NEON_STRINGOPS_TRUE,
2232   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233   tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238   &cortexa57_extra_costs,
2239   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2240   NULL,                                         /* Sched adj cost.  */
2241   arm_default_branch_cost,
2242   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2243   1,                                            /* Constant limit.  */
2244   2,                                            /* Max cond insns.  */
2245   8,                                            /* Memset max inline.  */
2246   2,                                            /* Issue rate.  */
2247   ARM_PREFETCH_NOT_BENEFICIAL,
2248   tune_params::PREF_CONST_POOL_FALSE,
2249   tune_params::PREF_LDRD_TRUE,
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2251   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2252   tune_params::DISPARAGE_FLAGS_ALL,
2253   tune_params::PREF_NEON_64_FALSE,
2254   tune_params::PREF_NEON_STRINGOPS_TRUE,
2255   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256   tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2260    cycle to execute each.  An LDR from the constant pool also takes two cycles
2261    to execute, but mildly increases pipelining opportunity (consecutive
2262    loads/stores can be pipelined together, saving one cycle), and may also
2263    improve icache utilisation.  Hence we prefer the constant pool for such
2264    processors.  */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268   &v7m_extra_costs,
2269   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2270   NULL,                                 /* Sched adj cost.  */
2271   arm_cortex_m_branch_cost,
2272   &arm_default_vec_cost,
2273   1,                                            /* Constant limit.  */
2274   2,                                            /* Max cond insns.  */
2275   8,                                            /* Memset max inline.  */
2276   1,                                            /* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_TRUE,
2279   tune_params::PREF_LDRD_FALSE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_NEITHER,
2283   tune_params::PREF_NEON_64_FALSE,
2284   tune_params::PREF_NEON_STRINGOPS_FALSE,
2285   tune_params::FUSE_NOTHING,
2286   tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning.  */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293   &v7m_extra_costs,
2294   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2295   NULL,                                 /* Sched adj cost.  */
2296   arm_cortex_m7_branch_cost,
2297   &arm_default_vec_cost,
2298   0,                                            /* Constant limit.  */
2299   1,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316    cortex-m23.  */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319   &generic_extra_costs,                 /* Insn extra costs.  */
2320   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2321   NULL,                                 /* Sched adj cost.  */
2322   arm_default_branch_cost,
2323   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2324   1,                                            /* Constant limit.  */
2325   5,                                            /* Max cond insns.  */
2326   8,                                            /* Memset max inline.  */
2327   1,                                            /* Issue rate.  */
2328   ARM_PREFETCH_NOT_BENEFICIAL,
2329   tune_params::PREF_CONST_POOL_FALSE,
2330   tune_params::PREF_LDRD_FALSE,
2331   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2332   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2333   tune_params::DISPARAGE_FLAGS_NEITHER,
2334   tune_params::PREF_NEON_64_FALSE,
2335   tune_params::PREF_NEON_STRINGOPS_FALSE,
2336   tune_params::FUSE_NOTHING,
2337   tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342   &generic_extra_costs,                         /* Insn extra costs.  */
2343   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2344   fa726te_sched_adjust_cost,
2345   arm_default_branch_cost,
2346   &arm_default_vec_cost,
2347   1,                                            /* Constant limit.  */
2348   5,                                            /* Max cond insns.  */
2349   8,                                            /* Memset max inline.  */
2350   2,                                            /* Issue rate.  */
2351   ARM_PREFETCH_NOT_BENEFICIAL,
2352   tune_params::PREF_CONST_POOL_TRUE,
2353   tune_params::PREF_LDRD_FALSE,
2354   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2355   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2356   tune_params::DISPARAGE_FLAGS_NEITHER,
2357   tune_params::PREF_NEON_64_FALSE,
2358   tune_params::PREF_NEON_STRINGOPS_FALSE,
2359   tune_params::FUSE_NOTHING,
2360   tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables.  */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2367    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368    is thus chosen to be big enough to hold the longest architecture name.  */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations.  */
2373
2374 enum tls_reloc {
2375   TLS_GD32,
2376   TLS_LDM32,
2377   TLS_LDO32,
2378   TLS_IE32,
2379   TLS_LE32,
2380   TLS_DESCSEQ   /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant.  */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387   return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set.  Both the operands must be known
2391    to be valid.  */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395   return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE.  */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402   unsigned long count = 0;
2403
2404   while (value)
2405     {
2406       count++;
2407       value &= value - 1;  /* Clear the least-significant set bit.  */
2408     }
2409
2410   return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP.  */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417   unsigned int count = 0;
2418   unsigned int n = 0;
2419   sbitmap_iterator sbi;
2420
2421   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422     count++;
2423   return count;
2424 }
2425
2426 typedef struct
2427 {
2428   machine_mode mode;
2429   const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs.  */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436                              const char *funcname, const char *modename,
2437                              int num_suffix)
2438 {
2439   char buffer[50];
2440
2441   if (num_suffix == 0)
2442     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443   else
2444     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446   set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451                             machine_mode from, const char *funcname,
2452                             const char *toname, const char *fromname)
2453 {
2454   char buffer[50];
2455   const char *maybe_suffix_2 = "";
2456
2457   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2458   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461     maybe_suffix_2 = "2";
2462
2463   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464            maybe_suffix_2);
2465
2466   set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM.  */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474   /* For Linux, we have access to kernel support for atomic operations.  */
2475   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478   /* There are no special library functions unless we are using the
2479      ARM BPABI.  */
2480   if (!TARGET_BPABI)
2481     return;
2482
2483   /* The functions below are described in Section 4 of the "Run-Time
2484      ABI for the ARM architecture", Version 1.0.  */
2485
2486   /* Double-precision floating-point arithmetic.  Table 2.  */
2487   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493   /* Double-precision comparisons.  Table 3.  */
2494   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495   set_optab_libfunc (ne_optab, DFmode, NULL);
2496   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502   /* Single-precision floating-point arithmetic.  Table 4.  */
2503   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509   /* Single-precision comparisons.  Table 5.  */
2510   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511   set_optab_libfunc (ne_optab, SFmode, NULL);
2512   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518   /* Floating-point to integer conversions.  Table 6.  */
2519   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528   /* Conversions between floating types.  Table 7.  */
2529   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532   /* Integer to floating-point conversions.  Table 8.  */
2533   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542   /* Long long.  Table 9.  */
2543   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552   /* Integer (32/32->32) division.  \S 4.3.1.  */
2553   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556   /* The divmod functions are designed so that they can be used for
2557      plain division, even though they return both the quotient and the
2558      remainder.  The quotient is returned in the usual location (i.e.,
2559      r0 for SImode, {r0, r1} for DImode), just as would be expected
2560      for an ordinary division routine.  Because the AAPCS calling
2561      conventions specify that all of { r0, r1, r2, r3 } are
2562      callee-saved registers, there is no need to tell the compiler
2563      explicitly that those registers are clobbered by these
2564      routines.  */
2565   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568   /* For SImode division the ABI provides div-without-mod routines,
2569      which are faster.  */
2570   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2574      divmod libcalls instead.  */
2575   set_optab_libfunc (smod_optab, DImode, NULL);
2576   set_optab_libfunc (umod_optab, DImode, NULL);
2577   set_optab_libfunc (smod_optab, SImode, NULL);
2578   set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580   /* Half-precision float operations.  The compiler handles all operations
2581      with NULL libfuncs by converting the SFmode.  */
2582   switch (arm_fp16_format)
2583     {
2584     case ARM_FP16_FORMAT_IEEE:
2585     case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587       /* Conversions.  */
2588       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590                          ? "__gnu_f2h_ieee"
2591                          : "__gnu_f2h_alternative"));
2592       set_conv_libfunc (sext_optab, SFmode, HFmode,
2593                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594                          ? "__gnu_h2f_ieee"
2595                          : "__gnu_h2f_alternative"));
2596
2597       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599                          ? "__gnu_d2h_ieee"
2600                          : "__gnu_d2h_alternative"));
2601
2602       /* Arithmetic.  */
2603       set_optab_libfunc (add_optab, HFmode, NULL);
2604       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605       set_optab_libfunc (smul_optab, HFmode, NULL);
2606       set_optab_libfunc (neg_optab, HFmode, NULL);
2607       set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609       /* Comparisons.  */
2610       set_optab_libfunc (eq_optab, HFmode, NULL);
2611       set_optab_libfunc (ne_optab, HFmode, NULL);
2612       set_optab_libfunc (lt_optab, HFmode, NULL);
2613       set_optab_libfunc (le_optab, HFmode, NULL);
2614       set_optab_libfunc (ge_optab, HFmode, NULL);
2615       set_optab_libfunc (gt_optab, HFmode, NULL);
2616       set_optab_libfunc (unord_optab, HFmode, NULL);
2617       break;
2618
2619     default:
2620       break;
2621     }
2622
2623   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2624   {
2625     const arm_fixed_mode_set fixed_arith_modes[] =
2626       {
2627         { E_QQmode, "qq" },
2628         { E_UQQmode, "uqq" },
2629         { E_HQmode, "hq" },
2630         { E_UHQmode, "uhq" },
2631         { E_SQmode, "sq" },
2632         { E_USQmode, "usq" },
2633         { E_DQmode, "dq" },
2634         { E_UDQmode, "udq" },
2635         { E_TQmode, "tq" },
2636         { E_UTQmode, "utq" },
2637         { E_HAmode, "ha" },
2638         { E_UHAmode, "uha" },
2639         { E_SAmode, "sa" },
2640         { E_USAmode, "usa" },
2641         { E_DAmode, "da" },
2642         { E_UDAmode, "uda" },
2643         { E_TAmode, "ta" },
2644         { E_UTAmode, "uta" }
2645       };
2646     const arm_fixed_mode_set fixed_conv_modes[] =
2647       {
2648         { E_QQmode, "qq" },
2649         { E_UQQmode, "uqq" },
2650         { E_HQmode, "hq" },
2651         { E_UHQmode, "uhq" },
2652         { E_SQmode, "sq" },
2653         { E_USQmode, "usq" },
2654         { E_DQmode, "dq" },
2655         { E_UDQmode, "udq" },
2656         { E_TQmode, "tq" },
2657         { E_UTQmode, "utq" },
2658         { E_HAmode, "ha" },
2659         { E_UHAmode, "uha" },
2660         { E_SAmode, "sa" },
2661         { E_USAmode, "usa" },
2662         { E_DAmode, "da" },
2663         { E_UDAmode, "uda" },
2664         { E_TAmode, "ta" },
2665         { E_UTAmode, "uta" },
2666         { E_QImode, "qi" },
2667         { E_HImode, "hi" },
2668         { E_SImode, "si" },
2669         { E_DImode, "di" },
2670         { E_TImode, "ti" },
2671         { E_SFmode, "sf" },
2672         { E_DFmode, "df" }
2673       };
2674     unsigned int i, j;
2675
2676     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677       {
2678         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679                                      "add", fixed_arith_modes[i].name, 3);
2680         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681                                      "ssadd", fixed_arith_modes[i].name, 3);
2682         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683                                      "usadd", fixed_arith_modes[i].name, 3);
2684         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685                                      "sub", fixed_arith_modes[i].name, 3);
2686         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687                                      "sssub", fixed_arith_modes[i].name, 3);
2688         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689                                      "ussub", fixed_arith_modes[i].name, 3);
2690         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691                                      "mul", fixed_arith_modes[i].name, 3);
2692         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693                                      "ssmul", fixed_arith_modes[i].name, 3);
2694         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695                                      "usmul", fixed_arith_modes[i].name, 3);
2696         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697                                      "div", fixed_arith_modes[i].name, 3);
2698         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699                                      "udiv", fixed_arith_modes[i].name, 3);
2700         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701                                      "ssdiv", fixed_arith_modes[i].name, 3);
2702         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703                                      "usdiv", fixed_arith_modes[i].name, 3);
2704         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705                                      "neg", fixed_arith_modes[i].name, 2);
2706         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707                                      "ssneg", fixed_arith_modes[i].name, 2);
2708         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709                                      "usneg", fixed_arith_modes[i].name, 2);
2710         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711                                      "ashl", fixed_arith_modes[i].name, 3);
2712         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713                                      "ashr", fixed_arith_modes[i].name, 3);
2714         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715                                      "lshr", fixed_arith_modes[i].name, 3);
2716         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717                                      "ssashl", fixed_arith_modes[i].name, 3);
2718         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719                                      "usashl", fixed_arith_modes[i].name, 3);
2720         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721                                      "cmp", fixed_arith_modes[i].name, 2);
2722       }
2723
2724     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726         {
2727           if (i == j
2728               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730             continue;
2731
2732           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733                                       fixed_conv_modes[j].mode, "fract",
2734                                       fixed_conv_modes[i].name,
2735                                       fixed_conv_modes[j].name);
2736           arm_set_fixed_conv_libfunc (satfract_optab,
2737                                       fixed_conv_modes[i].mode,
2738                                       fixed_conv_modes[j].mode, "satfract",
2739                                       fixed_conv_modes[i].name,
2740                                       fixed_conv_modes[j].name);
2741           arm_set_fixed_conv_libfunc (fractuns_optab,
2742                                       fixed_conv_modes[i].mode,
2743                                       fixed_conv_modes[j].mode, "fractuns",
2744                                       fixed_conv_modes[i].name,
2745                                       fixed_conv_modes[j].name);
2746           arm_set_fixed_conv_libfunc (satfractuns_optab,
2747                                       fixed_conv_modes[i].mode,
2748                                       fixed_conv_modes[j].mode, "satfractuns",
2749                                       fixed_conv_modes[i].name,
2750                                       fixed_conv_modes[j].name);
2751         }
2752   }
2753
2754   if (TARGET_AAPCS_BASED)
2755     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list".  */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list.  */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765   tree va_list_name;
2766   tree ap_field;
2767
2768   if (!TARGET_AAPCS_BASED)
2769     return std_build_builtin_va_list ();
2770
2771   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772      defined as:
2773
2774        struct __va_list
2775        {
2776          void *__ap;
2777        };
2778
2779      The C Library ABI further reinforces this definition in \S
2780      4.1.
2781
2782      We must follow this definition exactly.  The structure tag
2783      name is visible in C++ mangled names, and thus forms a part
2784      of the ABI.  The field name may be used by people who
2785      #include <stdarg.h>.  */
2786   /* Create the type.  */
2787   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788   /* Give it the required name.  */
2789   va_list_name = build_decl (BUILTINS_LOCATION,
2790                              TYPE_DECL,
2791                              get_identifier ("__va_list"),
2792                              va_list_type);
2793   DECL_ARTIFICIAL (va_list_name) = 1;
2794   TYPE_NAME (va_list_type) = va_list_name;
2795   TYPE_STUB_DECL (va_list_type) = va_list_name;
2796   /* Create the __ap field.  */
2797   ap_field = build_decl (BUILTINS_LOCATION,
2798                          FIELD_DECL,
2799                          get_identifier ("__ap"),
2800                          ptr_type_node);
2801   DECL_ARTIFICIAL (ap_field) = 1;
2802   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803   TYPE_FIELDS (va_list_type) = ap_field;
2804   /* Compute its layout.  */
2805   layout_type (va_list_type);
2806
2807   return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811    available argument in a variable-argument list.  VALIST is the
2812    user-level va_list object, of type __builtin_va_list.  */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816   if (TREE_TYPE (valist) == error_mark_node)
2817     return error_mark_node;
2818
2819   /* On an AAPCS target, the pointer is stored within "struct
2820      va_list".  */
2821   if (TARGET_AAPCS_BASED)
2822     {
2823       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825                        valist, ap_field, NULL_TREE);
2826     }
2827
2828   return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835   valist = arm_extract_valist_ptr (valist);
2836   std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842                           gimple_seq *post_p)
2843 {
2844   valist = arm_extract_valist_ptr (valist);
2845   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified.  */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852   int flags = opts->x_target_flags;
2853
2854   /* iWMMXt and NEON are incompatible.  */
2855   if (TARGET_IWMMXT
2856       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857     error ("iWMMXt and NEON are incompatible");
2858
2859   /* Make sure that the processor choice does not conflict with any of the
2860      other command line choices.  */
2861   if (TARGET_ARM_P (flags)
2862       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863     error ("target CPU does not support ARM mode");
2864
2865   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2866   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872   /* If this target is normally configured to use APCS frames, warn if they
2873      are turned off and debugging is turned on.  */
2874   if (TARGET_ARM_P (flags)
2875       && write_symbols != NO_DEBUG
2876       && !TARGET_APCS_FRAME
2877       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880   /* iWMMXt unsupported under Thumb mode.  */
2881   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882     error ("iWMMXt unsupported under Thumb mode");
2883
2884   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885     error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888     {
2889       error ("RTP PIC is incompatible with Thumb");
2890       flag_pic = 0;
2891     }
2892
2893   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894      with MOVT.  */
2895   if ((target_pure_code || target_slow_flash_data)
2896       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897     {
2898       const char *flag = (target_pure_code ? "-mpure-code" :
2899                                              "-mslow-flash-data");
2900       error ("%s only supports non-pic code on M-profile targets with the "
2901              "MOVT instruction", flag);
2902     }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options.  */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911   /* If we are not using the default (ARM mode) section anchor offset
2912      ranges, then set the correct ranges now.  */
2913   if (TARGET_THUMB1)
2914     {
2915       /* Thumb-1 LDR instructions cannot have negative offsets.
2916          Permissible positive offset ranges are 5-bit (for byte loads),
2917          6-bit (for halfword loads), or 7-bit (for word loads).
2918          Empirical results suggest a 7-bit anchor range gives the best
2919          overall code size.  */
2920       targetm.min_anchor_offset = 0;
2921       targetm.max_anchor_offset = 127;
2922     }
2923   else if (TARGET_THUMB2)
2924     {
2925       /* The minimum is set such that the total size of the block
2926          for a particular anchor is 248 + 1 + 4095 bytes, which is
2927          divisible by eight, ensuring natural spacing of anchors.  */
2928       targetm.min_anchor_offset = -248;
2929       targetm.max_anchor_offset = 4095;
2930     }
2931   else
2932     {
2933       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935     }
2936
2937   /* Increase the number of conditional instructions with -Os.  */
2938   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2941   if (TARGET_THUMB2)
2942     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946    mode, false if it should next add an attribute for the opposite mode.  */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override.  */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955   if (opts->x_align_functions <= 0)
2956     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2957       && opts->x_optimize_size ? 2 : 4;
2958 }
2959
2960 /* Implement targetm.override_options_after_change.  */
2961
2962 static void
2963 arm_override_options_after_change (void)
2964 {
2965   arm_configure_build_target (&arm_active_target,
2966                               TREE_TARGET_OPTION (target_option_default_node),
2967                               &global_options_set, false);
2968
2969   arm_override_options_after_change_1 (&global_options);
2970 }
2971
2972 /* Implement TARGET_OPTION_SAVE.  */
2973 static void
2974 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2975 {
2976   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2977   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2978   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2979 }
2980
2981 /* Implement TARGET_OPTION_RESTORE.  */
2982 static void
2983 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2984 {
2985   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2986   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2987   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2988   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2989                               false);
2990 }
2991
2992 /* Reset options between modes that the user has specified.  */
2993 static void
2994 arm_option_override_internal (struct gcc_options *opts,
2995                               struct gcc_options *opts_set)
2996 {
2997   arm_override_options_after_change_1 (opts);
2998
2999   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3000     {
3001       /* The default is to enable interworking, so this warning message would
3002          be confusing to users who have just compiled with
3003          eg, -march=armv4.  */
3004       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3005       opts->x_target_flags &= ~MASK_INTERWORK;
3006     }
3007
3008   if (TARGET_THUMB_P (opts->x_target_flags)
3009       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3010     {
3011       warning (0, "target CPU does not support THUMB instructions");
3012       opts->x_target_flags &= ~MASK_THUMB;
3013     }
3014
3015   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3016     {
3017       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3018       opts->x_target_flags &= ~MASK_APCS_FRAME;
3019     }
3020
3021   /* Callee super interworking implies thumb interworking.  Adding
3022      this to the flags here simplifies the logic elsewhere.  */
3023   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3024     opts->x_target_flags |= MASK_INTERWORK;
3025
3026   /* need to remember initial values so combinaisons of options like
3027      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3028   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3029
3030   if (! opts_set->x_arm_restrict_it)
3031     opts->x_arm_restrict_it = arm_arch8;
3032
3033   /* ARM execution state and M profile don't have [restrict] IT.  */
3034   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3035     opts->x_arm_restrict_it = 0;
3036
3037   /* Enable -munaligned-access by default for
3038      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3039      i.e. Thumb2 and ARM state only.
3040      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3041      - ARMv8 architecture-base processors.
3042
3043      Disable -munaligned-access by default for
3044      - all pre-ARMv6 architecture-based processors
3045      - ARMv6-M architecture-based processors
3046      - ARMv8-M Baseline processors.  */
3047
3048   if (! opts_set->x_unaligned_access)
3049     {
3050       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3051                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3052     }
3053   else if (opts->x_unaligned_access == 1
3054            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3055     {
3056       warning (0, "target CPU does not support unaligned accesses");
3057      opts->x_unaligned_access = 0;
3058     }
3059
3060   /* Don't warn since it's on by default in -O2.  */
3061   if (TARGET_THUMB1_P (opts->x_target_flags))
3062     opts->x_flag_schedule_insns = 0;
3063   else
3064     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3065
3066   /* Disable shrink-wrap when optimizing function for size, since it tends to
3067      generate additional returns.  */
3068   if (optimize_function_for_size_p (cfun)
3069       && TARGET_THUMB2_P (opts->x_target_flags))
3070     opts->x_flag_shrink_wrap = false;
3071   else
3072     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3073
3074   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3075      - epilogue_insns - does not accurately model the corresponding insns
3076      emitted in the asm file.  In particular, see the comment in thumb_exit
3077      'Find out how many of the (return) argument registers we can corrupt'.
3078      As a consequence, the epilogue may clobber registers without fipa-ra
3079      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3080      TODO: Accurately model clobbers for epilogue_insns and reenable
3081      fipa-ra.  */
3082   if (TARGET_THUMB1_P (opts->x_target_flags))
3083     opts->x_flag_ipa_ra = 0;
3084   else
3085     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3086
3087   /* Thumb2 inline assembly code should always use unified syntax.
3088      This will apply to ARM and Thumb1 eventually.  */
3089   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3090
3091 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3092   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3093 #endif
3094 }
3095
3096 static sbitmap isa_all_fpubits;
3097 static sbitmap isa_quirkbits;
3098
3099 /* Configure a build target TARGET from the user-specified options OPTS and
3100    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3101    architecture have been specified, but the two are not identical.  */
3102 void
3103 arm_configure_build_target (struct arm_build_target *target,
3104                             struct cl_target_option *opts,
3105                             struct gcc_options *opts_set,
3106                             bool warn_compatible)
3107 {
3108   const cpu_option *arm_selected_tune = NULL;
3109   const arch_option *arm_selected_arch = NULL;
3110   const cpu_option *arm_selected_cpu = NULL;
3111   const arm_fpu_desc *arm_selected_fpu = NULL;
3112   const char *tune_opts = NULL;
3113   const char *arch_opts = NULL;
3114   const char *cpu_opts = NULL;
3115
3116   bitmap_clear (target->isa);
3117   target->core_name = NULL;
3118   target->arch_name = NULL;
3119
3120   if (opts_set->x_arm_arch_string)
3121     {
3122       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3123                                                       "-march",
3124                                                       opts->x_arm_arch_string);
3125       arch_opts = strchr (opts->x_arm_arch_string, '+');
3126     }
3127
3128   if (opts_set->x_arm_cpu_string)
3129     {
3130       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3131                                                     opts->x_arm_cpu_string);
3132       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3133       arm_selected_tune = arm_selected_cpu;
3134       /* If taking the tuning from -mcpu, we don't need to rescan the
3135          options for tuning.  */
3136     }
3137
3138   if (opts_set->x_arm_tune_string)
3139     {
3140       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3141                                                      opts->x_arm_tune_string);
3142       tune_opts = strchr (opts->x_arm_tune_string, '+');
3143     }
3144
3145   if (arm_selected_arch)
3146     {
3147       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3148       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3149                                  arch_opts);
3150
3151       if (arm_selected_cpu)
3152         {
3153           auto_sbitmap cpu_isa (isa_num_bits);
3154           auto_sbitmap isa_delta (isa_num_bits);
3155
3156           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3157           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3158                                      cpu_opts);
3159           bitmap_xor (isa_delta, cpu_isa, target->isa);
3160           /* Ignore any bits that are quirk bits.  */
3161           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3162           /* Ignore (for now) any bits that might be set by -mfpu.  */
3163           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3164
3165           if (!bitmap_empty_p (isa_delta))
3166             {
3167               if (warn_compatible)
3168                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3169                          arm_selected_cpu->common.name,
3170                          arm_selected_arch->common.name);
3171               /* -march wins for code generation.
3172                  -mcpu wins for default tuning.  */
3173               if (!arm_selected_tune)
3174                 arm_selected_tune = arm_selected_cpu;
3175
3176               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3177               target->arch_name = arm_selected_arch->common.name;
3178             }
3179           else
3180             {
3181               /* Architecture and CPU are essentially the same.
3182                  Prefer the CPU setting.  */
3183               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3184               target->core_name = arm_selected_cpu->common.name;
3185               /* Copy the CPU's capabilities, so that we inherit the
3186                  appropriate extensions and quirks.  */
3187               bitmap_copy (target->isa, cpu_isa);
3188             }
3189         }
3190       else
3191         {
3192           /* Pick a CPU based on the architecture.  */
3193           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3194           target->arch_name = arm_selected_arch->common.name;
3195           /* Note: target->core_name is left unset in this path.  */
3196         }
3197     }
3198   else if (arm_selected_cpu)
3199     {
3200       target->core_name = arm_selected_cpu->common.name;
3201       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3202       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3203                                  cpu_opts);
3204       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3205     }
3206   /* If the user did not specify a processor or architecture, choose
3207      one for them.  */
3208   else
3209     {
3210       const cpu_option *sel;
3211       auto_sbitmap sought_isa (isa_num_bits);
3212       bitmap_clear (sought_isa);
3213       auto_sbitmap default_isa (isa_num_bits);
3214
3215       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3216                                                     TARGET_CPU_DEFAULT);
3217       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3218       gcc_assert (arm_selected_cpu->common.name);
3219
3220       /* RWE: All of the selection logic below (to the end of this
3221          'if' clause) looks somewhat suspect.  It appears to be mostly
3222          there to support forcing thumb support when the default CPU
3223          does not have thumb (somewhat dubious in terms of what the
3224          user might be expecting).  I think it should be removed once
3225          support for the pre-thumb era cores is removed.  */
3226       sel = arm_selected_cpu;
3227       arm_initialize_isa (default_isa, sel->common.isa_bits);
3228       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3229                                  cpu_opts);
3230
3231       /* Now check to see if the user has specified any command line
3232          switches that require certain abilities from the cpu.  */
3233
3234       if (TARGET_INTERWORK || TARGET_THUMB)
3235         {
3236           bitmap_set_bit (sought_isa, isa_bit_thumb);
3237
3238           /* There are no ARM processors that support both APCS-26 and
3239              interworking.  Therefore we forcibly remove MODE26 from
3240              from the isa features here (if it was set), so that the
3241              search below will always be able to find a compatible
3242              processor.  */
3243           bitmap_clear_bit (default_isa, isa_bit_mode26);
3244         }
3245
3246       /* If there are such requirements and the default CPU does not
3247          satisfy them, we need to run over the complete list of
3248          cores looking for one that is satisfactory.  */
3249       if (!bitmap_empty_p (sought_isa)
3250           && !bitmap_subset_p (sought_isa, default_isa))
3251         {
3252           auto_sbitmap candidate_isa (isa_num_bits);
3253           /* We're only interested in a CPU with at least the
3254              capabilities of the default CPU and the required
3255              additional features.  */
3256           bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258           /* Try to locate a CPU type that supports all of the abilities
3259              of the default CPU, plus the extra abilities requested by
3260              the user.  */
3261           for (sel = all_cores; sel->common.name != NULL; sel++)
3262             {
3263               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264               /* An exact match?  */
3265               if (bitmap_equal_p (default_isa, candidate_isa))
3266                 break;
3267             }
3268
3269           if (sel->common.name == NULL)
3270             {
3271               unsigned current_bit_count = isa_num_bits;
3272               const cpu_option *best_fit = NULL;
3273
3274               /* Ideally we would like to issue an error message here
3275                  saying that it was not possible to find a CPU compatible
3276                  with the default CPU, but which also supports the command
3277                  line options specified by the programmer, and so they
3278                  ought to use the -mcpu=<name> command line option to
3279                  override the default CPU type.
3280
3281                  If we cannot find a CPU that has exactly the
3282                  characteristics of the default CPU and the given
3283                  command line options we scan the array again looking
3284                  for a best match.  The best match must have at least
3285                  the capabilities of the perfect match.  */
3286               for (sel = all_cores; sel->common.name != NULL; sel++)
3287                 {
3288                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290                   if (bitmap_subset_p (default_isa, candidate_isa))
3291                     {
3292                       unsigned count;
3293
3294                       bitmap_and_compl (candidate_isa, candidate_isa,
3295                                         default_isa);
3296                       count = bitmap_popcount (candidate_isa);
3297
3298                       if (count < current_bit_count)
3299                         {
3300                           best_fit = sel;
3301                           current_bit_count = count;
3302                         }
3303                     }
3304
3305                   gcc_assert (best_fit);
3306                   sel = best_fit;
3307                 }
3308             }
3309           arm_selected_cpu = sel;
3310         }
3311
3312       /* Now we know the CPU, we can finally initialize the target
3313          structure.  */
3314       target->core_name = arm_selected_cpu->common.name;
3315       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317                                  cpu_opts);
3318       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319     }
3320
3321   gcc_assert (arm_selected_cpu);
3322   gcc_assert (arm_selected_arch);
3323
3324   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325     {
3326       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327       auto_sbitmap fpu_bits (isa_num_bits);
3328
3329       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331       bitmap_ior (target->isa, target->isa, fpu_bits);
3332     }
3333
3334   if (!arm_selected_tune)
3335     arm_selected_tune = arm_selected_cpu;
3336   else /* Validate the features passed to -mtune.  */
3337     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341   /* Finish initializing the target structure.  */
3342   target->arch_pp_name = arm_selected_arch->arch;
3343   target->base_arch = arm_selected_arch->base_arch;
3344   target->profile = arm_selected_arch->profile;
3345
3346   target->tune_flags = tune_data->tune_flags;
3347   target->tune = tune_data->tune;
3348   target->tune_core = tune_data->scheduler;
3349   arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified.  */
3353 static void
3354 arm_option_override (void)
3355 {
3356   static const enum isa_feature fpu_bitlist[]
3357     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359   cl_target_option opts;
3360
3361   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369   if (!global_options_set.x_arm_fpu_index)
3370     {
3371       bool ok;
3372       int fpu_index;
3373
3374       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375                                   CL_TARGET);
3376       gcc_assert (ok);
3377       arm_fpu_index = (enum fpu_type) fpu_index;
3378     }
3379
3380   cl_target_option_save (&opts, &global_options);
3381   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382                               true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385   SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388   /* Initialize boolean versions of the architectural flags, for use
3389      in the arm.md file and for enabling feature flags.  */
3390   arm_option_reconfigure_globals ();
3391
3392   arm_tune = arm_active_target.tune_core;
3393   tune_flags = arm_active_target.tune_flags;
3394   current_tune = arm_active_target.tune;
3395
3396   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3397   if (TARGET_APCS_FRAME)
3398     flag_shrink_wrap = false;
3399
3400   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401     {
3402       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403       target_flags |= MASK_APCS_FRAME;
3404     }
3405
3406   if (TARGET_POKE_FUNCTION_NAME)
3407     target_flags |= MASK_APCS_FRAME;
3408
3409   if (TARGET_APCS_REENT && flag_pic)
3410     error ("-fpic and -mapcs-reent are incompatible");
3411
3412   if (TARGET_APCS_REENT)
3413     warning (0, "APCS reentrant code not supported.  Ignored");
3414
3415   /* Set up some tuning parameters.  */
3416   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423   /* For arm2/3 there is no need to do any scheduling if we are doing
3424      software floating-point.  */
3425   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428   /* Override the default structure alignment for AAPCS ABI.  */
3429   if (!global_options_set.x_arm_structure_size_boundary)
3430     {
3431       if (TARGET_AAPCS_BASED)
3432         arm_structure_size_boundary = 8;
3433     }
3434   else
3435     {
3436       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438       if (arm_structure_size_boundary != 8
3439           && arm_structure_size_boundary != 32
3440           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441         {
3442           if (ARM_DOUBLEWORD_ALIGN)
3443             warning (0,
3444                      "structure size boundary can only be set to 8, 32 or 64");
3445           else
3446             warning (0, "structure size boundary can only be set to 8 or 32");
3447           arm_structure_size_boundary
3448             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449         }
3450     }
3451
3452   if (TARGET_VXWORKS_RTP)
3453     {
3454       if (!global_options_set.x_arm_pic_data_is_text_relative)
3455         arm_pic_data_is_text_relative = 0;
3456     }
3457   else if (flag_pic
3458            && !arm_pic_data_is_text_relative
3459            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460     /* When text & data segments don't have a fixed displacement, the
3461        intended use is with a single, read only, pic base register.
3462        Unless the user explicitly requested not to do that, set
3463        it.  */
3464     target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466   /* If stack checking is disabled, we can use r10 as the PIC register,
3467      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3468   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469     {
3470       if (TARGET_VXWORKS_RTP)
3471         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473     }
3474
3475   if (flag_pic && TARGET_VXWORKS_RTP)
3476     arm_pic_register = 9;
3477
3478   if (arm_pic_register_string != NULL)
3479     {
3480       int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482       if (!flag_pic)
3483         warning (0, "-mpic-register= is useless without -fpic");
3484
3485       /* Prevent the user from choosing an obviously stupid PIC register.  */
3486       else if (pic_register < 0 || call_used_regs[pic_register]
3487                || pic_register == HARD_FRAME_POINTER_REGNUM
3488                || pic_register == STACK_POINTER_REGNUM
3489                || pic_register >= PC_REGNUM
3490                || (TARGET_VXWORKS_RTP
3491                    && (unsigned int) pic_register != arm_pic_register))
3492         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493       else
3494         arm_pic_register = pic_register;
3495     }
3496
3497   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3498   if (fix_cm3_ldrd == 2)
3499     {
3500       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3501         fix_cm3_ldrd = 1;
3502       else
3503         fix_cm3_ldrd = 0;
3504     }
3505
3506   /* Hot/Cold partitioning is not currently supported, since we can't
3507      handle literal pool placement in that case.  */
3508   if (flag_reorder_blocks_and_partition)
3509     {
3510       inform (input_location,
3511               "-freorder-blocks-and-partition not supported on this architecture");
3512       flag_reorder_blocks_and_partition = 0;
3513       flag_reorder_blocks = 1;
3514     }
3515
3516   if (flag_pic)
3517     /* Hoisting PIC address calculations more aggressively provides a small,
3518        but measurable, size reduction for PIC code.  Therefore, we decrease
3519        the bar for unrestricted expression hoisting to the cost of PIC address
3520        calculation, which is 2 instructions.  */
3521     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3522                            global_options.x_param_values,
3523                            global_options_set.x_param_values);
3524
3525   /* ARM EABI defaults to strict volatile bitfields.  */
3526   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3527       && abi_version_at_least(2))
3528     flag_strict_volatile_bitfields = 1;
3529
3530   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531      have deemed it beneficial (signified by setting
3532      prefetch.num_slots to 1 or more).  */
3533   if (flag_prefetch_loop_arrays < 0
3534       && HAVE_prefetch
3535       && optimize >= 3
3536       && current_tune->prefetch.num_slots > 0)
3537     flag_prefetch_loop_arrays = 1;
3538
3539   /* Set up parameters to be used in prefetching algorithm.  Do not
3540      override the defaults unless we are tuning for a core we have
3541      researched values for.  */
3542   if (current_tune->prefetch.num_slots > 0)
3543     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3544                            current_tune->prefetch.num_slots,
3545                            global_options.x_param_values,
3546                            global_options_set.x_param_values);
3547   if (current_tune->prefetch.l1_cache_line_size >= 0)
3548     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3549                            current_tune->prefetch.l1_cache_line_size,
3550                            global_options.x_param_values,
3551                            global_options_set.x_param_values);
3552   if (current_tune->prefetch.l1_cache_size >= 0)
3553     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3554                            current_tune->prefetch.l1_cache_size,
3555                            global_options.x_param_values,
3556                            global_options_set.x_param_values);
3557
3558   /* Use Neon to perform 64-bits operations rather than core
3559      registers.  */
3560   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3561   if (use_neon_for_64bits == 1)
3562      prefer_neon_for_64bits = true;
3563
3564   /* Use the alternative scheduling-pressure algorithm by default.  */
3565   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3566                          global_options.x_param_values,
3567                          global_options_set.x_param_values);
3568
3569   /* Look through ready list and all of queue for instructions
3570      relevant for L2 auto-prefetcher.  */
3571   int param_sched_autopref_queue_depth;
3572
3573   switch (current_tune->sched_autopref)
3574     {
3575     case tune_params::SCHED_AUTOPREF_OFF:
3576       param_sched_autopref_queue_depth = -1;
3577       break;
3578
3579     case tune_params::SCHED_AUTOPREF_RANK:
3580       param_sched_autopref_queue_depth = 0;
3581       break;
3582
3583     case tune_params::SCHED_AUTOPREF_FULL:
3584       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3585       break;
3586
3587     default:
3588       gcc_unreachable ();
3589     }
3590
3591   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3592                          param_sched_autopref_queue_depth,
3593                          global_options.x_param_values,
3594                          global_options_set.x_param_values);
3595
3596   /* Currently, for slow flash data, we just disable literal pools.  We also
3597      disable it for pure-code.  */
3598   if (target_slow_flash_data || target_pure_code)
3599     arm_disable_literal_pool = true;
3600
3601   /* Disable scheduling fusion by default if it's not armv7 processor
3602      or doesn't prefer ldrd/strd.  */
3603   if (flag_schedule_fusion == 2
3604       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3605     flag_schedule_fusion = 0;
3606
3607   /* Need to remember initial options before they are overriden.  */
3608   init_optimize = build_optimization_node (&global_options);
3609
3610   arm_options_perform_arch_sanity_checks ();
3611   arm_option_override_internal (&global_options, &global_options_set);
3612   arm_option_check_internal (&global_options);
3613   arm_option_params_internal ();
3614
3615   /* Create the default target_options structure.  */
3616   target_option_default_node = target_option_current_node
3617     = build_target_option_node (&global_options);
3618
3619   /* Register global variables with the garbage collector.  */
3620   arm_add_gc_roots ();
3621
3622   /* Init initial mode for testing.  */
3623   thumb_flipper = TARGET_THUMB;
3624 }
3625
3626
3627 /* Reconfigure global status flags from the active_target.isa.  */
3628 void
3629 arm_option_reconfigure_globals (void)
3630 {
3631   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3632   arm_base_arch = arm_active_target.base_arch;
3633
3634   /* Initialize boolean versions of the architectural flags, for use
3635      in the arm.md file.  */
3636   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3637   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3638   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3639   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3640   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3641   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3642   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3643   arm_arch6m = arm_arch6 && !arm_arch_notm;
3644   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3645   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3646   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3647   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3648   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3649   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3650   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3651   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3652   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3653   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3654   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3655   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3656   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3657   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3658   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3659   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3660   if (arm_fp16_inst)
3661     {
3662       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3663         error ("selected fp16 options are incompatible");
3664       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3665     }
3666
3667   /* And finally, set up some quirks.  */
3668   arm_arch_no_volatile_ce
3669     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3670   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3671                                             isa_bit_quirk_armv6kz);
3672
3673   /* Use the cp15 method if it is available.  */
3674   if (target_thread_pointer == TP_AUTO)
3675     {
3676       if (arm_arch6k && !TARGET_THUMB1)
3677         target_thread_pointer = TP_CP15;
3678       else
3679         target_thread_pointer = TP_SOFT;
3680     }
3681 }
3682
3683 /* Perform some validation between the desired architecture and the rest of the
3684    options.  */
3685 void
3686 arm_options_perform_arch_sanity_checks (void)
3687 {
3688   /* V5T code we generate is completely interworking capable, so we turn off
3689      TARGET_INTERWORK here to avoid many tests later on.  */
3690
3691   /* XXX However, we must pass the right pre-processor defines to CPP
3692      or GLD can get confused.  This is a hack.  */
3693   if (TARGET_INTERWORK)
3694     arm_cpp_interwork = 1;
3695
3696   if (arm_arch5t)
3697     target_flags &= ~MASK_INTERWORK;
3698
3699   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3700     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3701
3702   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3703     error ("iwmmxt abi requires an iwmmxt capable cpu");
3704
3705   /* BPABI targets use linker tricks to allow interworking on cores
3706      without thumb support.  */
3707   if (TARGET_INTERWORK
3708       && !TARGET_BPABI
3709       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3710     {
3711       warning (0, "target CPU does not support interworking" );
3712       target_flags &= ~MASK_INTERWORK;
3713     }
3714
3715   /* If soft-float is specified then don't use FPU.  */
3716   if (TARGET_SOFT_FLOAT)
3717     arm_fpu_attr = FPU_NONE;
3718   else
3719     arm_fpu_attr = FPU_VFP;
3720
3721   if (TARGET_AAPCS_BASED)
3722     {
3723       if (TARGET_CALLER_INTERWORKING)
3724         error ("AAPCS does not support -mcaller-super-interworking");
3725       else
3726         if (TARGET_CALLEE_INTERWORKING)
3727           error ("AAPCS does not support -mcallee-super-interworking");
3728     }
3729
3730   /* __fp16 support currently assumes the core has ldrh.  */
3731   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3732     sorry ("__fp16 and no ldrh");
3733
3734   if (use_cmse && !arm_arch_cmse)
3735     error ("target CPU does not support ARMv8-M Security Extensions");
3736
3737   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3738      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3739   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3740     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3741
3742
3743   if (TARGET_AAPCS_BASED)
3744     {
3745       if (arm_abi == ARM_ABI_IWMMXT)
3746         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3747       else if (TARGET_HARD_FLOAT_ABI)
3748         {
3749           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3750           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3751             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3752         }
3753       else
3754         arm_pcs_default = ARM_PCS_AAPCS;
3755     }
3756   else
3757     {
3758       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3759         sorry ("-mfloat-abi=hard and VFP");
3760
3761       if (arm_abi == ARM_ABI_APCS)
3762         arm_pcs_default = ARM_PCS_APCS;
3763       else
3764         arm_pcs_default = ARM_PCS_ATPCS;
3765     }
3766 }
3767
3768 static void
3769 arm_add_gc_roots (void)
3770 {
3771   gcc_obstack_init(&minipool_obstack);
3772   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3773 }
3774 \f
3775 /* A table of known ARM exception types.
3776    For use with the interrupt function attribute.  */
3777
3778 typedef struct
3779 {
3780   const char *const arg;
3781   const unsigned long return_value;
3782 }
3783 isr_attribute_arg;
3784
3785 static const isr_attribute_arg isr_attribute_args [] =
3786 {
3787   { "IRQ",   ARM_FT_ISR },
3788   { "irq",   ARM_FT_ISR },
3789   { "FIQ",   ARM_FT_FIQ },
3790   { "fiq",   ARM_FT_FIQ },
3791   { "ABORT", ARM_FT_ISR },
3792   { "abort", ARM_FT_ISR },
3793   { "ABORT", ARM_FT_ISR },
3794   { "abort", ARM_FT_ISR },
3795   { "UNDEF", ARM_FT_EXCEPTION },
3796   { "undef", ARM_FT_EXCEPTION },
3797   { "SWI",   ARM_FT_EXCEPTION },
3798   { "swi",   ARM_FT_EXCEPTION },
3799   { NULL,    ARM_FT_NORMAL }
3800 };
3801
3802 /* Returns the (interrupt) function type of the current
3803    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3804
3805 static unsigned long
3806 arm_isr_value (tree argument)
3807 {
3808   const isr_attribute_arg * ptr;
3809   const char *              arg;
3810
3811   if (!arm_arch_notm)
3812     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3813
3814   /* No argument - default to IRQ.  */
3815   if (argument == NULL_TREE)
3816     return ARM_FT_ISR;
3817
3818   /* Get the value of the argument.  */
3819   if (TREE_VALUE (argument) == NULL_TREE
3820       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3821     return ARM_FT_UNKNOWN;
3822
3823   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3824
3825   /* Check it against the list of known arguments.  */
3826   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3827     if (streq (arg, ptr->arg))
3828       return ptr->return_value;
3829
3830   /* An unrecognized interrupt type.  */
3831   return ARM_FT_UNKNOWN;
3832 }
3833
3834 /* Computes the type of the current function.  */
3835
3836 static unsigned long
3837 arm_compute_func_type (void)
3838 {
3839   unsigned long type = ARM_FT_UNKNOWN;
3840   tree a;
3841   tree attr;
3842
3843   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3844
3845   /* Decide if the current function is volatile.  Such functions
3846      never return, and many memory cycles can be saved by not storing
3847      register values that will never be needed again.  This optimization
3848      was added to speed up context switching in a kernel application.  */
3849   if (optimize > 0
3850       && (TREE_NOTHROW (current_function_decl)
3851           || !(flag_unwind_tables
3852                || (flag_exceptions
3853                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3854       && TREE_THIS_VOLATILE (current_function_decl))
3855     type |= ARM_FT_VOLATILE;
3856
3857   if (cfun->static_chain_decl != NULL)
3858     type |= ARM_FT_NESTED;
3859
3860   attr = DECL_ATTRIBUTES (current_function_decl);
3861
3862   a = lookup_attribute ("naked", attr);
3863   if (a != NULL_TREE)
3864     type |= ARM_FT_NAKED;
3865
3866   a = lookup_attribute ("isr", attr);
3867   if (a == NULL_TREE)
3868     a = lookup_attribute ("interrupt", attr);
3869
3870   if (a == NULL_TREE)
3871     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3872   else
3873     type |= arm_isr_value (TREE_VALUE (a));
3874
3875   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3876     type |= ARM_FT_CMSE_ENTRY;
3877
3878   return type;
3879 }
3880
3881 /* Returns the type of the current function.  */
3882
3883 unsigned long
3884 arm_current_func_type (void)
3885 {
3886   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3887     cfun->machine->func_type = arm_compute_func_type ();
3888
3889   return cfun->machine->func_type;
3890 }
3891
3892 bool
3893 arm_allocate_stack_slots_for_args (void)
3894 {
3895   /* Naked functions should not allocate stack slots for arguments.  */
3896   return !IS_NAKED (arm_current_func_type ());
3897 }
3898
3899 static bool
3900 arm_warn_func_return (tree decl)
3901 {
3902   /* Naked functions are implemented entirely in assembly, including the
3903      return sequence, so suppress warnings about this.  */
3904   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3905 }
3906
3907 \f
3908 /* Output assembler code for a block containing the constant parts
3909    of a trampoline, leaving space for the variable parts.
3910
3911    On the ARM, (if r8 is the static chain regnum, and remembering that
3912    referencing pc adds an offset of 8) the trampoline looks like:
3913            ldr          r8, [pc, #0]
3914            ldr          pc, [pc]
3915            .word        static chain value
3916            .word        function's address
3917    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3918
3919 static void
3920 arm_asm_trampoline_template (FILE *f)
3921 {
3922   fprintf (f, "\t.syntax unified\n");
3923
3924   if (TARGET_ARM)
3925     {
3926       fprintf (f, "\t.arm\n");
3927       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3928       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3929     }
3930   else if (TARGET_THUMB2)
3931     {
3932       fprintf (f, "\t.thumb\n");
3933       /* The Thumb-2 trampoline is similar to the arm implementation.
3934          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3935       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3936                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3937       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3938     }
3939   else
3940     {
3941       ASM_OUTPUT_ALIGN (f, 2);
3942       fprintf (f, "\t.code\t16\n");
3943       fprintf (f, ".Ltrampoline_start:\n");
3944       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3945       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3946       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3947       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3948       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3949       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3950     }
3951   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3952   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3953 }
3954
3955 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3956
3957 static void
3958 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3959 {
3960   rtx fnaddr, mem, a_tramp;
3961
3962   emit_block_move (m_tramp, assemble_trampoline_template (),
3963                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3964
3965   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3966   emit_move_insn (mem, chain_value);
3967
3968   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3969   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3970   emit_move_insn (mem, fnaddr);
3971
3972   a_tramp = XEXP (m_tramp, 0);
3973   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3974                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3975                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3976 }
3977
3978 /* Thumb trampolines should be entered in thumb mode, so set
3979    the bottom bit of the address.  */
3980
3981 static rtx
3982 arm_trampoline_adjust_address (rtx addr)
3983 {
3984   if (TARGET_THUMB)
3985     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3986                                 NULL, 0, OPTAB_LIB_WIDEN);
3987   return addr;
3988 }
3989 \f
3990 /* Return 1 if it is possible to return using a single instruction.
3991    If SIBLING is non-null, this is a test for a return before a sibling
3992    call.  SIBLING is the call insn, so we can examine its register usage.  */
3993
3994 int
3995 use_return_insn (int iscond, rtx sibling)
3996 {
3997   int regno;
3998   unsigned int func_type;
3999   unsigned long saved_int_regs;
4000   unsigned HOST_WIDE_INT stack_adjust;
4001   arm_stack_offsets *offsets;
4002
4003   /* Never use a return instruction before reload has run.  */
4004   if (!reload_completed)
4005     return 0;
4006
4007   func_type = arm_current_func_type ();
4008
4009   /* Naked, volatile and stack alignment functions need special
4010      consideration.  */
4011   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4012     return 0;
4013
4014   /* So do interrupt functions that use the frame pointer and Thumb
4015      interrupt functions.  */
4016   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4017     return 0;
4018
4019   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4020       && !optimize_function_for_size_p (cfun))
4021     return 0;
4022
4023   offsets = arm_get_frame_offsets ();
4024   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4025
4026   /* As do variadic functions.  */
4027   if (crtl->args.pretend_args_size
4028       || cfun->machine->uses_anonymous_args
4029       /* Or if the function calls __builtin_eh_return () */
4030       || crtl->calls_eh_return
4031       /* Or if the function calls alloca */
4032       || cfun->calls_alloca
4033       /* Or if there is a stack adjustment.  However, if the stack pointer
4034          is saved on the stack, we can use a pre-incrementing stack load.  */
4035       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4036                                  && stack_adjust == 4))
4037       /* Or if the static chain register was saved above the frame, under the
4038          assumption that the stack pointer isn't saved on the stack.  */
4039       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4040           && arm_compute_static_chain_stack_bytes() != 0))
4041     return 0;
4042
4043   saved_int_regs = offsets->saved_regs_mask;
4044
4045   /* Unfortunately, the insn
4046
4047        ldmib sp, {..., sp, ...}
4048
4049      triggers a bug on most SA-110 based devices, such that the stack
4050      pointer won't be correctly restored if the instruction takes a
4051      page fault.  We work around this problem by popping r3 along with
4052      the other registers, since that is never slower than executing
4053      another instruction.
4054
4055      We test for !arm_arch5t here, because code for any architecture
4056      less than this could potentially be run on one of the buggy
4057      chips.  */
4058   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4059     {
4060       /* Validate that r3 is a call-clobbered register (always true in
4061          the default abi) ...  */
4062       if (!call_used_regs[3])
4063         return 0;
4064
4065       /* ... that it isn't being used for a return value ... */
4066       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4067         return 0;
4068
4069       /* ... or for a tail-call argument ...  */
4070       if (sibling)
4071         {
4072           gcc_assert (CALL_P (sibling));
4073
4074           if (find_regno_fusage (sibling, USE, 3))
4075             return 0;
4076         }
4077
4078       /* ... and that there are no call-saved registers in r0-r2
4079          (always true in the default ABI).  */
4080       if (saved_int_regs & 0x7)
4081         return 0;
4082     }
4083
4084   /* Can't be done if interworking with Thumb, and any registers have been
4085      stacked.  */
4086   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4087     return 0;
4088
4089   /* On StrongARM, conditional returns are expensive if they aren't
4090      taken and multiple registers have been stacked.  */
4091   if (iscond && arm_tune_strongarm)
4092     {
4093       /* Conditional return when just the LR is stored is a simple
4094          conditional-load instruction, that's not expensive.  */
4095       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4096         return 0;
4097
4098       if (flag_pic
4099           && arm_pic_register != INVALID_REGNUM
4100           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4101         return 0;
4102     }
4103
4104   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4105      several instructions if anything needs to be popped.  */
4106   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4107     return 0;
4108
4109   /* If there are saved registers but the LR isn't saved, then we need
4110      two instructions for the return.  */
4111   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4112     return 0;
4113
4114   /* Can't be done if any of the VFP regs are pushed,
4115      since this also requires an insn.  */
4116   if (TARGET_HARD_FLOAT)
4117     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4118       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4119         return 0;
4120
4121   if (TARGET_REALLY_IWMMXT)
4122     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4123       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4124         return 0;
4125
4126   return 1;
4127 }
4128
4129 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4130    shrink-wrapping if possible.  This is the case if we need to emit a
4131    prologue, which we can test by looking at the offsets.  */
4132 bool
4133 use_simple_return_p (void)
4134 {
4135   arm_stack_offsets *offsets;
4136
4137   /* Note this function can be called before or after reload.  */
4138   if (!reload_completed)
4139     arm_compute_frame_layout ();
4140
4141   offsets = arm_get_frame_offsets ();
4142   return offsets->outgoing_args != 0;
4143 }
4144
4145 /* Return TRUE if int I is a valid immediate ARM constant.  */
4146
4147 int
4148 const_ok_for_arm (HOST_WIDE_INT i)
4149 {
4150   int lowbit;
4151
4152   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4153      be all zero, or all one.  */
4154   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4155       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4156           != ((~(unsigned HOST_WIDE_INT) 0)
4157               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4158     return FALSE;
4159
4160   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4161
4162   /* Fast return for 0 and small values.  We must do this for zero, since
4163      the code below can't handle that one case.  */
4164   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4165     return TRUE;
4166
4167   /* Get the number of trailing zeros.  */
4168   lowbit = ffs((int) i) - 1;
4169
4170   /* Only even shifts are allowed in ARM mode so round down to the
4171      nearest even number.  */
4172   if (TARGET_ARM)
4173     lowbit &= ~1;
4174
4175   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4176     return TRUE;
4177
4178   if (TARGET_ARM)
4179     {
4180       /* Allow rotated constants in ARM mode.  */
4181       if (lowbit <= 4
4182            && ((i & ~0xc000003f) == 0
4183                || (i & ~0xf000000f) == 0
4184                || (i & ~0xfc000003) == 0))
4185         return TRUE;
4186     }
4187   else if (TARGET_THUMB2)
4188     {
4189       HOST_WIDE_INT v;
4190
4191       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4192       v = i & 0xff;
4193       v |= v << 16;
4194       if (i == v || i == (v | (v << 8)))
4195         return TRUE;
4196
4197       /* Allow repeated pattern 0xXY00XY00.  */
4198       v = i & 0xff00;
4199       v |= v << 16;
4200       if (i == v)
4201         return TRUE;
4202     }
4203   else if (TARGET_HAVE_MOVT)
4204     {
4205       /* Thumb-1 Targets with MOVT.  */
4206       if (i > 0xffff)
4207         return FALSE;
4208       else
4209         return TRUE;
4210     }
4211
4212   return FALSE;
4213 }
4214
4215 /* Return true if I is a valid constant for the operation CODE.  */
4216 int
4217 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4218 {
4219   if (const_ok_for_arm (i))
4220     return 1;
4221
4222   switch (code)
4223     {
4224     case SET:
4225       /* See if we can use movw.  */
4226       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4227         return 1;
4228       else
4229         /* Otherwise, try mvn.  */
4230         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4231
4232     case PLUS:
4233       /* See if we can use addw or subw.  */
4234       if (TARGET_THUMB2
4235           && ((i & 0xfffff000) == 0
4236               || ((-i) & 0xfffff000) == 0))
4237         return 1;
4238       /* Fall through.  */
4239     case COMPARE:
4240     case EQ:
4241     case NE:
4242     case GT:
4243     case LE:
4244     case LT:
4245     case GE:
4246     case GEU:
4247     case LTU:
4248     case GTU:
4249     case LEU:
4250     case UNORDERED:
4251     case ORDERED:
4252     case UNEQ:
4253     case UNGE:
4254     case UNLT:
4255     case UNGT:
4256     case UNLE:
4257       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4258
4259     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4260     case XOR:
4261       return 0;
4262
4263     case IOR:
4264       if (TARGET_THUMB2)
4265         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4266       return 0;
4267
4268     case AND:
4269       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4270
4271     default:
4272       gcc_unreachable ();
4273     }
4274 }
4275
4276 /* Return true if I is a valid di mode constant for the operation CODE.  */
4277 int
4278 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4279 {
4280   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4281   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4282   rtx hi = GEN_INT (hi_val);
4283   rtx lo = GEN_INT (lo_val);
4284
4285   if (TARGET_THUMB1)
4286     return 0;
4287
4288   switch (code)
4289     {
4290     case AND:
4291     case IOR:
4292     case XOR:
4293       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4294               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4295     case PLUS:
4296       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4297
4298     default:
4299       return 0;
4300     }
4301 }
4302
4303 /* Emit a sequence of insns to handle a large constant.
4304    CODE is the code of the operation required, it can be any of SET, PLUS,
4305    IOR, AND, XOR, MINUS;
4306    MODE is the mode in which the operation is being performed;
4307    VAL is the integer to operate on;
4308    SOURCE is the other operand (a register, or a null-pointer for SET);
4309    SUBTARGETS means it is safe to create scratch registers if that will
4310    either produce a simpler sequence, or we will want to cse the values.
4311    Return value is the number of insns emitted.  */
4312
4313 /* ??? Tweak this for thumb2.  */
4314 int
4315 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4316                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4317 {
4318   rtx cond;
4319
4320   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4321     cond = COND_EXEC_TEST (PATTERN (insn));
4322   else
4323     cond = NULL_RTX;
4324
4325   if (subtargets || code == SET
4326       || (REG_P (target) && REG_P (source)
4327           && REGNO (target) != REGNO (source)))
4328     {
4329       /* After arm_reorg has been called, we can't fix up expensive
4330          constants by pushing them into memory so we must synthesize
4331          them in-line, regardless of the cost.  This is only likely to
4332          be more costly on chips that have load delay slots and we are
4333          compiling without running the scheduler (so no splitting
4334          occurred before the final instruction emission).
4335
4336          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4337       */
4338       if (!cfun->machine->after_arm_reorg
4339           && !cond
4340           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4341                                 1, 0)
4342               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4343                  + (code != SET))))
4344         {
4345           if (code == SET)
4346             {
4347               /* Currently SET is the only monadic value for CODE, all
4348                  the rest are diadic.  */
4349               if (TARGET_USE_MOVT)
4350                 arm_emit_movpair (target, GEN_INT (val));
4351               else
4352                 emit_set_insn (target, GEN_INT (val));
4353
4354               return 1;
4355             }
4356           else
4357             {
4358               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4359
4360               if (TARGET_USE_MOVT)
4361                 arm_emit_movpair (temp, GEN_INT (val));
4362               else
4363                 emit_set_insn (temp, GEN_INT (val));
4364
4365               /* For MINUS, the value is subtracted from, since we never
4366                  have subtraction of a constant.  */
4367               if (code == MINUS)
4368                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4369               else
4370                 emit_set_insn (target,
4371                                gen_rtx_fmt_ee (code, mode, source, temp));
4372               return 2;
4373             }
4374         }
4375     }
4376
4377   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4378                            1);
4379 }
4380
4381 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4382    ARM/THUMB2 immediates, and add up to VAL.
4383    Thr function return value gives the number of insns required.  */
4384 static int
4385 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4386                             struct four_ints *return_sequence)
4387 {
4388   int best_consecutive_zeros = 0;
4389   int i;
4390   int best_start = 0;
4391   int insns1, insns2;
4392   struct four_ints tmp_sequence;
4393
4394   /* If we aren't targeting ARM, the best place to start is always at
4395      the bottom, otherwise look more closely.  */
4396   if (TARGET_ARM)
4397     {
4398       for (i = 0; i < 32; i += 2)
4399         {
4400           int consecutive_zeros = 0;
4401
4402           if (!(val & (3 << i)))
4403             {
4404               while ((i < 32) && !(val & (3 << i)))
4405                 {
4406                   consecutive_zeros += 2;
4407                   i += 2;
4408                 }
4409               if (consecutive_zeros > best_consecutive_zeros)
4410                 {
4411                   best_consecutive_zeros = consecutive_zeros;
4412                   best_start = i - consecutive_zeros;
4413                 }
4414               i -= 2;
4415             }
4416         }
4417     }
4418
4419   /* So long as it won't require any more insns to do so, it's
4420      desirable to emit a small constant (in bits 0...9) in the last
4421      insn.  This way there is more chance that it can be combined with
4422      a later addressing insn to form a pre-indexed load or store
4423      operation.  Consider:
4424
4425            *((volatile int *)0xe0000100) = 1;
4426            *((volatile int *)0xe0000110) = 2;
4427
4428      We want this to wind up as:
4429
4430             mov rA, #0xe0000000
4431             mov rB, #1
4432             str rB, [rA, #0x100]
4433             mov rB, #2
4434             str rB, [rA, #0x110]
4435
4436      rather than having to synthesize both large constants from scratch.
4437
4438      Therefore, we calculate how many insns would be required to emit
4439      the constant starting from `best_start', and also starting from
4440      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4441      yield a shorter sequence, we may as well use zero.  */
4442   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4443   if (best_start != 0
4444       && ((HOST_WIDE_INT_1U << best_start) < val))
4445     {
4446       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4447       if (insns2 <= insns1)
4448         {
4449           *return_sequence = tmp_sequence;
4450           insns1 = insns2;
4451         }
4452     }
4453
4454   return insns1;
4455 }
4456
4457 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4458 static int
4459 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4460                              struct four_ints *return_sequence, int i)
4461 {
4462   int remainder = val & 0xffffffff;
4463   int insns = 0;
4464
4465   /* Try and find a way of doing the job in either two or three
4466      instructions.
4467
4468      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4469      location.  We start at position I.  This may be the MSB, or
4470      optimial_immediate_sequence may have positioned it at the largest block
4471      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4472      wrapping around to the top of the word when we drop off the bottom.
4473      In the worst case this code should produce no more than four insns.
4474
4475      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4476      constants, shifted to any arbitrary location.  We should always start
4477      at the MSB.  */
4478   do
4479     {
4480       int end;
4481       unsigned int b1, b2, b3, b4;
4482       unsigned HOST_WIDE_INT result;
4483       int loc;
4484
4485       gcc_assert (insns < 4);
4486
4487       if (i <= 0)
4488         i += 32;
4489
4490       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4491       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4492         {
4493           loc = i;
4494           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4495             /* We can use addw/subw for the last 12 bits.  */
4496             result = remainder;
4497           else
4498             {
4499               /* Use an 8-bit shifted/rotated immediate.  */
4500               end = i - 8;
4501               if (end < 0)
4502                 end += 32;
4503               result = remainder & ((0x0ff << end)
4504                                    | ((i < end) ? (0xff >> (32 - end))
4505                                                 : 0));
4506               i -= 8;
4507             }
4508         }
4509       else
4510         {
4511           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4512              arbitrary shifts.  */
4513           i -= TARGET_ARM ? 2 : 1;
4514           continue;
4515         }
4516
4517       /* Next, see if we can do a better job with a thumb2 replicated
4518          constant.
4519
4520          We do it this way around to catch the cases like 0x01F001E0 where
4521          two 8-bit immediates would work, but a replicated constant would
4522          make it worse.
4523
4524          TODO: 16-bit constants that don't clear all the bits, but still win.
4525          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4526       if (TARGET_THUMB2)
4527         {
4528           b1 = (remainder & 0xff000000) >> 24;
4529           b2 = (remainder & 0x00ff0000) >> 16;
4530           b3 = (remainder & 0x0000ff00) >> 8;
4531           b4 = remainder & 0xff;
4532
4533           if (loc > 24)
4534             {
4535               /* The 8-bit immediate already found clears b1 (and maybe b2),
4536                  but must leave b3 and b4 alone.  */
4537
4538               /* First try to find a 32-bit replicated constant that clears
4539                  almost everything.  We can assume that we can't do it in one,
4540                  or else we wouldn't be here.  */
4541               unsigned int tmp = b1 & b2 & b3 & b4;
4542               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4543                                   + (tmp << 24);
4544               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4545                                             + (tmp == b3) + (tmp == b4);
4546               if (tmp
4547                   && (matching_bytes >= 3
4548                       || (matching_bytes == 2
4549                           && const_ok_for_op (remainder & ~tmp2, code))))
4550                 {
4551                   /* At least 3 of the bytes match, and the fourth has at
4552                      least as many bits set, or two of the bytes match
4553                      and it will only require one more insn to finish.  */
4554                   result = tmp2;
4555                   i = tmp != b1 ? 32
4556                       : tmp != b2 ? 24
4557                       : tmp != b3 ? 16
4558                       : 8;
4559                 }
4560
4561               /* Second, try to find a 16-bit replicated constant that can
4562                  leave three of the bytes clear.  If b2 or b4 is already
4563                  zero, then we can.  If the 8-bit from above would not
4564                  clear b2 anyway, then we still win.  */
4565               else if (b1 == b3 && (!b2 || !b4
4566                                || (remainder & 0x00ff0000 & ~result)))
4567                 {
4568                   result = remainder & 0xff00ff00;
4569                   i = 24;
4570                 }
4571             }
4572           else if (loc > 16)
4573             {
4574               /* The 8-bit immediate already found clears b2 (and maybe b3)
4575                  and we don't get here unless b1 is alredy clear, but it will
4576                  leave b4 unchanged.  */
4577
4578               /* If we can clear b2 and b4 at once, then we win, since the
4579                  8-bits couldn't possibly reach that far.  */
4580               if (b2 == b4)
4581                 {
4582                   result = remainder & 0x00ff00ff;
4583                   i = 16;
4584                 }
4585             }
4586         }
4587
4588       return_sequence->i[insns++] = result;
4589       remainder &= ~result;
4590
4591       if (code == SET || code == MINUS)
4592         code = PLUS;
4593     }
4594   while (remainder);
4595
4596   return insns;
4597 }
4598
4599 /* Emit an instruction with the indicated PATTERN.  If COND is
4600    non-NULL, conditionalize the execution of the instruction on COND
4601    being true.  */
4602
4603 static void
4604 emit_constant_insn (rtx cond, rtx pattern)
4605 {
4606   if (cond)
4607     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4608   emit_insn (pattern);
4609 }
4610
4611 /* As above, but extra parameter GENERATE which, if clear, suppresses
4612    RTL generation.  */
4613
4614 static int
4615 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4616                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4617                   int subtargets, int generate)
4618 {
4619   int can_invert = 0;
4620   int can_negate = 0;
4621   int final_invert = 0;
4622   int i;
4623   int set_sign_bit_copies = 0;
4624   int clear_sign_bit_copies = 0;
4625   int clear_zero_bit_copies = 0;
4626   int set_zero_bit_copies = 0;
4627   int insns = 0, neg_insns, inv_insns;
4628   unsigned HOST_WIDE_INT temp1, temp2;
4629   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4630   struct four_ints *immediates;
4631   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4632
4633   /* Find out which operations are safe for a given CODE.  Also do a quick
4634      check for degenerate cases; these can occur when DImode operations
4635      are split.  */
4636   switch (code)
4637     {
4638     case SET:
4639       can_invert = 1;
4640       break;
4641
4642     case PLUS:
4643       can_negate = 1;
4644       break;
4645
4646     case IOR:
4647       if (remainder == 0xffffffff)
4648         {
4649           if (generate)
4650             emit_constant_insn (cond,
4651                                 gen_rtx_SET (target,
4652                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4653           return 1;
4654         }
4655
4656       if (remainder == 0)
4657         {
4658           if (reload_completed && rtx_equal_p (target, source))
4659             return 0;
4660
4661           if (generate)
4662             emit_constant_insn (cond, gen_rtx_SET (target, source));
4663           return 1;
4664         }
4665       break;
4666
4667     case AND:
4668       if (remainder == 0)
4669         {
4670           if (generate)
4671             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4672           return 1;
4673         }
4674       if (remainder == 0xffffffff)
4675         {
4676           if (reload_completed && rtx_equal_p (target, source))
4677             return 0;
4678           if (generate)
4679             emit_constant_insn (cond, gen_rtx_SET (target, source));
4680           return 1;
4681         }
4682       can_invert = 1;
4683       break;
4684
4685     case XOR:
4686       if (remainder == 0)
4687         {
4688           if (reload_completed && rtx_equal_p (target, source))
4689             return 0;
4690           if (generate)
4691             emit_constant_insn (cond, gen_rtx_SET (target, source));
4692           return 1;
4693         }
4694
4695       if (remainder == 0xffffffff)
4696         {
4697           if (generate)
4698             emit_constant_insn (cond,
4699                                 gen_rtx_SET (target,
4700                                              gen_rtx_NOT (mode, source)));
4701           return 1;
4702         }
4703       final_invert = 1;
4704       break;
4705
4706     case MINUS:
4707       /* We treat MINUS as (val - source), since (source - val) is always
4708          passed as (source + (-val)).  */
4709       if (remainder == 0)
4710         {
4711           if (generate)
4712             emit_constant_insn (cond,
4713                                 gen_rtx_SET (target,
4714                                              gen_rtx_NEG (mode, source)));
4715           return 1;
4716         }
4717       if (const_ok_for_arm (val))
4718         {
4719           if (generate)
4720             emit_constant_insn (cond,
4721                                 gen_rtx_SET (target,
4722                                              gen_rtx_MINUS (mode, GEN_INT (val),
4723                                                             source)));
4724           return 1;
4725         }
4726
4727       break;
4728
4729     default:
4730       gcc_unreachable ();
4731     }
4732
4733   /* If we can do it in one insn get out quickly.  */
4734   if (const_ok_for_op (val, code))
4735     {
4736       if (generate)
4737         emit_constant_insn (cond,
4738                             gen_rtx_SET (target,
4739                                          (source
4740                                           ? gen_rtx_fmt_ee (code, mode, source,
4741                                                             GEN_INT (val))
4742                                           : GEN_INT (val))));
4743       return 1;
4744     }
4745
4746   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4747      insn.  */
4748   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4749       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4750     {
4751       if (generate)
4752         {
4753           if (mode == SImode && i == 16)
4754             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4755                smaller insn.  */
4756             emit_constant_insn (cond,
4757                                 gen_zero_extendhisi2
4758                                 (target, gen_lowpart (HImode, source)));
4759           else
4760             /* Extz only supports SImode, but we can coerce the operands
4761                into that mode.  */
4762             emit_constant_insn (cond,
4763                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4764                                               gen_lowpart (SImode, source),
4765                                               GEN_INT (i), const0_rtx));
4766         }
4767
4768       return 1;
4769     }
4770
4771   /* Calculate a few attributes that may be useful for specific
4772      optimizations.  */
4773   /* Count number of leading zeros.  */
4774   for (i = 31; i >= 0; i--)
4775     {
4776       if ((remainder & (1 << i)) == 0)
4777         clear_sign_bit_copies++;
4778       else
4779         break;
4780     }
4781
4782   /* Count number of leading 1's.  */
4783   for (i = 31; i >= 0; i--)
4784     {
4785       if ((remainder & (1 << i)) != 0)
4786         set_sign_bit_copies++;
4787       else
4788         break;
4789     }
4790
4791   /* Count number of trailing zero's.  */
4792   for (i = 0; i <= 31; i++)
4793     {
4794       if ((remainder & (1 << i)) == 0)
4795         clear_zero_bit_copies++;
4796       else
4797         break;
4798     }
4799
4800   /* Count number of trailing 1's.  */
4801   for (i = 0; i <= 31; i++)
4802     {
4803       if ((remainder & (1 << i)) != 0)
4804         set_zero_bit_copies++;
4805       else
4806         break;
4807     }
4808
4809   switch (code)
4810     {
4811     case SET:
4812       /* See if we can do this by sign_extending a constant that is known
4813          to be negative.  This is a good, way of doing it, since the shift
4814          may well merge into a subsequent insn.  */
4815       if (set_sign_bit_copies > 1)
4816         {
4817           if (const_ok_for_arm
4818               (temp1 = ARM_SIGN_EXTEND (remainder
4819                                         << (set_sign_bit_copies - 1))))
4820             {
4821               if (generate)
4822                 {
4823                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4824                   emit_constant_insn (cond,
4825                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4826                   emit_constant_insn (cond,
4827                                       gen_ashrsi3 (target, new_src,
4828                                                    GEN_INT (set_sign_bit_copies - 1)));
4829                 }
4830               return 2;
4831             }
4832           /* For an inverted constant, we will need to set the low bits,
4833              these will be shifted out of harm's way.  */
4834           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4835           if (const_ok_for_arm (~temp1))
4836             {
4837               if (generate)
4838                 {
4839                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4840                   emit_constant_insn (cond,
4841                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4842                   emit_constant_insn (cond,
4843                                       gen_ashrsi3 (target, new_src,
4844                                                    GEN_INT (set_sign_bit_copies - 1)));
4845                 }
4846               return 2;
4847             }
4848         }
4849
4850       /* See if we can calculate the value as the difference between two
4851          valid immediates.  */
4852       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4853         {
4854           int topshift = clear_sign_bit_copies & ~1;
4855
4856           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4857                                    & (0xff000000 >> topshift));
4858
4859           /* If temp1 is zero, then that means the 9 most significant
4860              bits of remainder were 1 and we've caused it to overflow.
4861              When topshift is 0 we don't need to do anything since we
4862              can borrow from 'bit 32'.  */
4863           if (temp1 == 0 && topshift != 0)
4864             temp1 = 0x80000000 >> (topshift - 1);
4865
4866           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4867
4868           if (const_ok_for_arm (temp2))
4869             {
4870               if (generate)
4871                 {
4872                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4873                   emit_constant_insn (cond,
4874                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4875                   emit_constant_insn (cond,
4876                                       gen_addsi3 (target, new_src,
4877                                                   GEN_INT (-temp2)));
4878                 }
4879
4880               return 2;
4881             }
4882         }
4883
4884       /* See if we can generate this by setting the bottom (or the top)
4885          16 bits, and then shifting these into the other half of the
4886          word.  We only look for the simplest cases, to do more would cost
4887          too much.  Be careful, however, not to generate this when the
4888          alternative would take fewer insns.  */
4889       if (val & 0xffff0000)
4890         {
4891           temp1 = remainder & 0xffff0000;
4892           temp2 = remainder & 0x0000ffff;
4893
4894           /* Overlaps outside this range are best done using other methods.  */
4895           for (i = 9; i < 24; i++)
4896             {
4897               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4898                   && !const_ok_for_arm (temp2))
4899                 {
4900                   rtx new_src = (subtargets
4901                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4902                                  : target);
4903                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4904                                             source, subtargets, generate);
4905                   source = new_src;
4906                   if (generate)
4907                     emit_constant_insn
4908                       (cond,
4909                        gen_rtx_SET
4910                        (target,
4911                         gen_rtx_IOR (mode,
4912                                      gen_rtx_ASHIFT (mode, source,
4913                                                      GEN_INT (i)),
4914                                      source)));
4915                   return insns + 1;
4916                 }
4917             }
4918
4919           /* Don't duplicate cases already considered.  */
4920           for (i = 17; i < 24; i++)
4921             {
4922               if (((temp1 | (temp1 >> i)) == remainder)
4923                   && !const_ok_for_arm (temp1))
4924                 {
4925                   rtx new_src = (subtargets
4926                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4927                                  : target);
4928                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4929                                             source, subtargets, generate);
4930                   source = new_src;
4931                   if (generate)
4932                     emit_constant_insn
4933                       (cond,
4934                        gen_rtx_SET (target,
4935                                     gen_rtx_IOR
4936                                     (mode,
4937                                      gen_rtx_LSHIFTRT (mode, source,
4938                                                        GEN_INT (i)),
4939                                      source)));
4940                   return insns + 1;
4941                 }
4942             }
4943         }
4944       break;
4945
4946     case IOR:
4947     case XOR:
4948       /* If we have IOR or XOR, and the constant can be loaded in a
4949          single instruction, and we can find a temporary to put it in,
4950          then this can be done in two instructions instead of 3-4.  */
4951       if (subtargets
4952           /* TARGET can't be NULL if SUBTARGETS is 0 */
4953           || (reload_completed && !reg_mentioned_p (target, source)))
4954         {
4955           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4956             {
4957               if (generate)
4958                 {
4959                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960
4961                   emit_constant_insn (cond,
4962                                       gen_rtx_SET (sub, GEN_INT (val)));
4963                   emit_constant_insn (cond,
4964                                       gen_rtx_SET (target,
4965                                                    gen_rtx_fmt_ee (code, mode,
4966                                                                    source, sub)));
4967                 }
4968               return 2;
4969             }
4970         }
4971
4972       if (code == XOR)
4973         break;
4974
4975       /*  Convert.
4976           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4977                              and the remainder 0s for e.g. 0xfff00000)
4978           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4979
4980           This can be done in 2 instructions by using shifts with mov or mvn.
4981           e.g. for
4982           x = x | 0xfff00000;
4983           we generate.
4984           mvn   r0, r0, asl #12
4985           mvn   r0, r0, lsr #12  */
4986       if (set_sign_bit_copies > 8
4987           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4988         {
4989           if (generate)
4990             {
4991               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4992               rtx shift = GEN_INT (set_sign_bit_copies);
4993
4994               emit_constant_insn
4995                 (cond,
4996                  gen_rtx_SET (sub,
4997                               gen_rtx_NOT (mode,
4998                                            gen_rtx_ASHIFT (mode,
4999                                                            source,
5000                                                            shift))));
5001               emit_constant_insn
5002                 (cond,
5003                  gen_rtx_SET (target,
5004                               gen_rtx_NOT (mode,
5005                                            gen_rtx_LSHIFTRT (mode, sub,
5006                                                              shift))));
5007             }
5008           return 2;
5009         }
5010
5011       /* Convert
5012           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5013            to
5014           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5015
5016           For eg. r0 = r0 | 0xfff
5017                mvn      r0, r0, lsr #12
5018                mvn      r0, r0, asl #12
5019
5020       */
5021       if (set_zero_bit_copies > 8
5022           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5023         {
5024           if (generate)
5025             {
5026               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5027               rtx shift = GEN_INT (set_zero_bit_copies);
5028
5029               emit_constant_insn
5030                 (cond,
5031                  gen_rtx_SET (sub,
5032                               gen_rtx_NOT (mode,
5033                                            gen_rtx_LSHIFTRT (mode,
5034                                                              source,
5035                                                              shift))));
5036               emit_constant_insn
5037                 (cond,
5038                  gen_rtx_SET (target,
5039                               gen_rtx_NOT (mode,
5040                                            gen_rtx_ASHIFT (mode, sub,
5041                                                            shift))));
5042             }
5043           return 2;
5044         }
5045
5046       /* This will never be reached for Thumb2 because orn is a valid
5047          instruction. This is for Thumb1 and the ARM 32 bit cases.
5048
5049          x = y | constant (such that ~constant is a valid constant)
5050          Transform this to
5051          x = ~(~y & ~constant).
5052       */
5053       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5054         {
5055           if (generate)
5056             {
5057               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5058               emit_constant_insn (cond,
5059                                   gen_rtx_SET (sub,
5060                                                gen_rtx_NOT (mode, source)));
5061               source = sub;
5062               if (subtargets)
5063                 sub = gen_reg_rtx (mode);
5064               emit_constant_insn (cond,
5065                                   gen_rtx_SET (sub,
5066                                                gen_rtx_AND (mode, source,
5067                                                             GEN_INT (temp1))));
5068               emit_constant_insn (cond,
5069                                   gen_rtx_SET (target,
5070                                                gen_rtx_NOT (mode, sub)));
5071             }
5072           return 3;
5073         }
5074       break;
5075
5076     case AND:
5077       /* See if two shifts will do 2 or more insn's worth of work.  */
5078       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5079         {
5080           HOST_WIDE_INT shift_mask = ((0xffffffff
5081                                        << (32 - clear_sign_bit_copies))
5082                                       & 0xffffffff);
5083
5084           if ((remainder | shift_mask) != 0xffffffff)
5085             {
5086               HOST_WIDE_INT new_val
5087                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5088
5089               if (generate)
5090                 {
5091                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5092                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5093                                             new_src, source, subtargets, 1);
5094                   source = new_src;
5095                 }
5096               else
5097                 {
5098                   rtx targ = subtargets ? NULL_RTX : target;
5099                   insns = arm_gen_constant (AND, mode, cond, new_val,
5100                                             targ, source, subtargets, 0);
5101                 }
5102             }
5103
5104           if (generate)
5105             {
5106               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5107               rtx shift = GEN_INT (clear_sign_bit_copies);
5108
5109               emit_insn (gen_ashlsi3 (new_src, source, shift));
5110               emit_insn (gen_lshrsi3 (target, new_src, shift));
5111             }
5112
5113           return insns + 2;
5114         }
5115
5116       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5117         {
5118           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5119
5120           if ((remainder | shift_mask) != 0xffffffff)
5121             {
5122               HOST_WIDE_INT new_val
5123                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5124               if (generate)
5125                 {
5126                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5127
5128                   insns = arm_gen_constant (AND, mode, cond, new_val,
5129                                             new_src, source, subtargets, 1);
5130                   source = new_src;
5131                 }
5132               else
5133                 {
5134                   rtx targ = subtargets ? NULL_RTX : target;
5135
5136                   insns = arm_gen_constant (AND, mode, cond, new_val,
5137                                             targ, source, subtargets, 0);
5138                 }
5139             }
5140
5141           if (generate)
5142             {
5143               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5144               rtx shift = GEN_INT (clear_zero_bit_copies);
5145
5146               emit_insn (gen_lshrsi3 (new_src, source, shift));
5147               emit_insn (gen_ashlsi3 (target, new_src, shift));
5148             }
5149
5150           return insns + 2;
5151         }
5152
5153       break;
5154
5155     default:
5156       break;
5157     }
5158
5159   /* Calculate what the instruction sequences would be if we generated it
5160      normally, negated, or inverted.  */
5161   if (code == AND)
5162     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5163     insns = 99;
5164   else
5165     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5166
5167   if (can_negate)
5168     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5169                                             &neg_immediates);
5170   else
5171     neg_insns = 99;
5172
5173   if (can_invert || final_invert)
5174     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5175                                             &inv_immediates);
5176   else
5177     inv_insns = 99;
5178
5179   immediates = &pos_immediates;
5180
5181   /* Is the negated immediate sequence more efficient?  */
5182   if (neg_insns < insns && neg_insns <= inv_insns)
5183     {
5184       insns = neg_insns;
5185       immediates = &neg_immediates;
5186     }
5187   else
5188     can_negate = 0;
5189
5190   /* Is the inverted immediate sequence more efficient?
5191      We must allow for an extra NOT instruction for XOR operations, although
5192      there is some chance that the final 'mvn' will get optimized later.  */
5193   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5194     {
5195       insns = inv_insns;
5196       immediates = &inv_immediates;
5197     }
5198   else
5199     {
5200       can_invert = 0;
5201       final_invert = 0;
5202     }
5203
5204   /* Now output the chosen sequence as instructions.  */
5205   if (generate)
5206     {
5207       for (i = 0; i < insns; i++)
5208         {
5209           rtx new_src, temp1_rtx;
5210
5211           temp1 = immediates->i[i];
5212
5213           if (code == SET || code == MINUS)
5214             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5215           else if ((final_invert || i < (insns - 1)) && subtargets)
5216             new_src = gen_reg_rtx (mode);
5217           else
5218             new_src = target;
5219
5220           if (can_invert)
5221             temp1 = ~temp1;
5222           else if (can_negate)
5223             temp1 = -temp1;
5224
5225           temp1 = trunc_int_for_mode (temp1, mode);
5226           temp1_rtx = GEN_INT (temp1);
5227
5228           if (code == SET)
5229             ;
5230           else if (code == MINUS)
5231             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5232           else
5233             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5234
5235           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5236           source = new_src;
5237
5238           if (code == SET)
5239             {
5240               can_negate = can_invert;
5241               can_invert = 0;
5242               code = PLUS;
5243             }
5244           else if (code == MINUS)
5245             code = PLUS;
5246         }
5247     }
5248
5249   if (final_invert)
5250     {
5251       if (generate)
5252         emit_constant_insn (cond, gen_rtx_SET (target,
5253                                                gen_rtx_NOT (mode, source)));
5254       insns++;
5255     }
5256
5257   return insns;
5258 }
5259
5260 /* Canonicalize a comparison so that we are more likely to recognize it.
5261    This can be done for a few constant compares, where we can make the
5262    immediate value easier to load.  */
5263
5264 static void
5265 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5266                              bool op0_preserve_value)
5267 {
5268   machine_mode mode;
5269   unsigned HOST_WIDE_INT i, maxval;
5270
5271   mode = GET_MODE (*op0);
5272   if (mode == VOIDmode)
5273     mode = GET_MODE (*op1);
5274
5275   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5276
5277   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5278      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5279      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5280      for GTU/LEU in Thumb mode.  */
5281   if (mode == DImode)
5282     {
5283
5284       if (*code == GT || *code == LE
5285           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5286         {
5287           /* Missing comparison.  First try to use an available
5288              comparison.  */
5289           if (CONST_INT_P (*op1))
5290             {
5291               i = INTVAL (*op1);
5292               switch (*code)
5293                 {
5294                 case GT:
5295                 case LE:
5296                   if (i != maxval
5297                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5298                     {
5299                       *op1 = GEN_INT (i + 1);
5300                       *code = *code == GT ? GE : LT;
5301                       return;
5302                     }
5303                   break;
5304                 case GTU:
5305                 case LEU:
5306                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5307                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5308                     {
5309                       *op1 = GEN_INT (i + 1);
5310                       *code = *code == GTU ? GEU : LTU;
5311                       return;
5312                     }
5313                   break;
5314                 default:
5315                   gcc_unreachable ();
5316                 }
5317             }
5318
5319           /* If that did not work, reverse the condition.  */
5320           if (!op0_preserve_value)
5321             {
5322               std::swap (*op0, *op1);
5323               *code = (int)swap_condition ((enum rtx_code)*code);
5324             }
5325         }
5326       return;
5327     }
5328
5329   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5330      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5331      to facilitate possible combining with a cmp into 'ands'.  */
5332   if (mode == SImode
5333       && GET_CODE (*op0) == ZERO_EXTEND
5334       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5335       && GET_MODE (XEXP (*op0, 0)) == QImode
5336       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5337       && subreg_lowpart_p (XEXP (*op0, 0))
5338       && *op1 == const0_rtx)
5339     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5340                         GEN_INT (255));
5341
5342   /* Comparisons smaller than DImode.  Only adjust comparisons against
5343      an out-of-range constant.  */
5344   if (!CONST_INT_P (*op1)
5345       || const_ok_for_arm (INTVAL (*op1))
5346       || const_ok_for_arm (- INTVAL (*op1)))
5347     return;
5348
5349   i = INTVAL (*op1);
5350
5351   switch (*code)
5352     {
5353     case EQ:
5354     case NE:
5355       return;
5356
5357     case GT:
5358     case LE:
5359       if (i != maxval
5360           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5361         {
5362           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5363           *code = *code == GT ? GE : LT;
5364           return;
5365         }
5366       break;
5367
5368     case GE:
5369     case LT:
5370       if (i != ~maxval
5371           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5372         {
5373           *op1 = GEN_INT (i - 1);
5374           *code = *code == GE ? GT : LE;
5375           return;
5376         }
5377       break;
5378
5379     case GTU:
5380     case LEU:
5381       if (i != ~((unsigned HOST_WIDE_INT) 0)
5382           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5383         {
5384           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5385           *code = *code == GTU ? GEU : LTU;
5386           return;
5387         }
5388       break;
5389
5390     case GEU:
5391     case LTU:
5392       if (i != 0
5393           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5394         {
5395           *op1 = GEN_INT (i - 1);
5396           *code = *code == GEU ? GTU : LEU;
5397           return;
5398         }
5399       break;
5400
5401     default:
5402       gcc_unreachable ();
5403     }
5404 }
5405
5406
5407 /* Define how to find the value returned by a function.  */
5408
5409 static rtx
5410 arm_function_value(const_tree type, const_tree func,
5411                    bool outgoing ATTRIBUTE_UNUSED)
5412 {
5413   machine_mode mode;
5414   int unsignedp ATTRIBUTE_UNUSED;
5415   rtx r ATTRIBUTE_UNUSED;
5416
5417   mode = TYPE_MODE (type);
5418
5419   if (TARGET_AAPCS_BASED)
5420     return aapcs_allocate_return_reg (mode, type, func);
5421
5422   /* Promote integer types.  */
5423   if (INTEGRAL_TYPE_P (type))
5424     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5425
5426   /* Promotes small structs returned in a register to full-word size
5427      for big-endian AAPCS.  */
5428   if (arm_return_in_msb (type))
5429     {
5430       HOST_WIDE_INT size = int_size_in_bytes (type);
5431       if (size % UNITS_PER_WORD != 0)
5432         {
5433           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5434           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5435         }
5436     }
5437
5438   return arm_libcall_value_1 (mode);
5439 }
5440
5441 /* libcall hashtable helpers.  */
5442
5443 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5444 {
5445   static inline hashval_t hash (const rtx_def *);
5446   static inline bool equal (const rtx_def *, const rtx_def *);
5447   static inline void remove (rtx_def *);
5448 };
5449
5450 inline bool
5451 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5452 {
5453   return rtx_equal_p (p1, p2);
5454 }
5455
5456 inline hashval_t
5457 libcall_hasher::hash (const rtx_def *p1)
5458 {
5459   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5460 }
5461
5462 typedef hash_table<libcall_hasher> libcall_table_type;
5463
5464 static void
5465 add_libcall (libcall_table_type *htab, rtx libcall)
5466 {
5467   *htab->find_slot (libcall, INSERT) = libcall;
5468 }
5469
5470 static bool
5471 arm_libcall_uses_aapcs_base (const_rtx libcall)
5472 {
5473   static bool init_done = false;
5474   static libcall_table_type *libcall_htab = NULL;
5475
5476   if (!init_done)
5477     {
5478       init_done = true;
5479
5480       libcall_htab = new libcall_table_type (31);
5481       add_libcall (libcall_htab,
5482                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5483       add_libcall (libcall_htab,
5484                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5485       add_libcall (libcall_htab,
5486                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5487       add_libcall (libcall_htab,
5488                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5489
5490       add_libcall (libcall_htab,
5491                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5492       add_libcall (libcall_htab,
5493                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5494       add_libcall (libcall_htab,
5495                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5496       add_libcall (libcall_htab,
5497                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5498
5499       add_libcall (libcall_htab,
5500                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5501       add_libcall (libcall_htab,
5502                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5503       add_libcall (libcall_htab,
5504                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5505       add_libcall (libcall_htab,
5506                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5507       add_libcall (libcall_htab,
5508                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5509       add_libcall (libcall_htab,
5510                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5511       add_libcall (libcall_htab,
5512                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5513       add_libcall (libcall_htab,
5514                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5515
5516       /* Values from double-precision helper functions are returned in core
5517          registers if the selected core only supports single-precision
5518          arithmetic, even if we are using the hard-float ABI.  The same is
5519          true for single-precision helpers, but we will never be using the
5520          hard-float ABI on a CPU which doesn't support single-precision
5521          operations in hardware.  */
5522       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5523       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5524       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5525       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5526       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5527       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5528       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5529       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5530       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5531       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5532       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5533       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5534                                                         SFmode));
5535       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5536                                                         DFmode));
5537       add_libcall (libcall_htab,
5538                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5539     }
5540
5541   return libcall && libcall_htab->find (libcall) != NULL;
5542 }
5543
5544 static rtx
5545 arm_libcall_value_1 (machine_mode mode)
5546 {
5547   if (TARGET_AAPCS_BASED)
5548     return aapcs_libcall_value (mode);
5549   else if (TARGET_IWMMXT_ABI
5550            && arm_vector_mode_supported_p (mode))
5551     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5552   else
5553     return gen_rtx_REG (mode, ARG_REGISTER (1));
5554 }
5555
5556 /* Define how to find the value returned by a library function
5557    assuming the value has mode MODE.  */
5558
5559 static rtx
5560 arm_libcall_value (machine_mode mode, const_rtx libcall)
5561 {
5562   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5563       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5564     {
5565       /* The following libcalls return their result in integer registers,
5566          even though they return a floating point value.  */
5567       if (arm_libcall_uses_aapcs_base (libcall))
5568         return gen_rtx_REG (mode, ARG_REGISTER(1));
5569
5570     }
5571
5572   return arm_libcall_value_1 (mode);
5573 }
5574
5575 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5576
5577 static bool
5578 arm_function_value_regno_p (const unsigned int regno)
5579 {
5580   if (regno == ARG_REGISTER (1)
5581       || (TARGET_32BIT
5582           && TARGET_AAPCS_BASED
5583           && TARGET_HARD_FLOAT
5584           && regno == FIRST_VFP_REGNUM)
5585       || (TARGET_IWMMXT_ABI
5586           && regno == FIRST_IWMMXT_REGNUM))
5587     return true;
5588
5589   return false;
5590 }
5591
5592 /* Determine the amount of memory needed to store the possible return
5593    registers of an untyped call.  */
5594 int
5595 arm_apply_result_size (void)
5596 {
5597   int size = 16;
5598
5599   if (TARGET_32BIT)
5600     {
5601       if (TARGET_HARD_FLOAT_ABI)
5602         size += 32;
5603       if (TARGET_IWMMXT_ABI)
5604         size += 8;
5605     }
5606
5607   return size;
5608 }
5609
5610 /* Decide whether TYPE should be returned in memory (true)
5611    or in a register (false).  FNTYPE is the type of the function making
5612    the call.  */
5613 static bool
5614 arm_return_in_memory (const_tree type, const_tree fntype)
5615 {
5616   HOST_WIDE_INT size;
5617
5618   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5619
5620   if (TARGET_AAPCS_BASED)
5621     {
5622       /* Simple, non-aggregate types (ie not including vectors and
5623          complex) are always returned in a register (or registers).
5624          We don't care about which register here, so we can short-cut
5625          some of the detail.  */
5626       if (!AGGREGATE_TYPE_P (type)
5627           && TREE_CODE (type) != VECTOR_TYPE
5628           && TREE_CODE (type) != COMPLEX_TYPE)
5629         return false;
5630
5631       /* Any return value that is no larger than one word can be
5632          returned in r0.  */
5633       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5634         return false;
5635
5636       /* Check any available co-processors to see if they accept the
5637          type as a register candidate (VFP, for example, can return
5638          some aggregates in consecutive registers).  These aren't
5639          available if the call is variadic.  */
5640       if (aapcs_select_return_coproc (type, fntype) >= 0)
5641         return false;
5642
5643       /* Vector values should be returned using ARM registers, not
5644          memory (unless they're over 16 bytes, which will break since
5645          we only have four call-clobbered registers to play with).  */
5646       if (TREE_CODE (type) == VECTOR_TYPE)
5647         return (size < 0 || size > (4 * UNITS_PER_WORD));
5648
5649       /* The rest go in memory.  */
5650       return true;
5651     }
5652
5653   if (TREE_CODE (type) == VECTOR_TYPE)
5654     return (size < 0 || size > (4 * UNITS_PER_WORD));
5655
5656   if (!AGGREGATE_TYPE_P (type) &&
5657       (TREE_CODE (type) != VECTOR_TYPE))
5658     /* All simple types are returned in registers.  */
5659     return false;
5660
5661   if (arm_abi != ARM_ABI_APCS)
5662     {
5663       /* ATPCS and later return aggregate types in memory only if they are
5664          larger than a word (or are variable size).  */
5665       return (size < 0 || size > UNITS_PER_WORD);
5666     }
5667
5668   /* For the arm-wince targets we choose to be compatible with Microsoft's
5669      ARM and Thumb compilers, which always return aggregates in memory.  */
5670 #ifndef ARM_WINCE
5671   /* All structures/unions bigger than one word are returned in memory.
5672      Also catch the case where int_size_in_bytes returns -1.  In this case
5673      the aggregate is either huge or of variable size, and in either case
5674      we will want to return it via memory and not in a register.  */
5675   if (size < 0 || size > UNITS_PER_WORD)
5676     return true;
5677
5678   if (TREE_CODE (type) == RECORD_TYPE)
5679     {
5680       tree field;
5681
5682       /* For a struct the APCS says that we only return in a register
5683          if the type is 'integer like' and every addressable element
5684          has an offset of zero.  For practical purposes this means
5685          that the structure can have at most one non bit-field element
5686          and that this element must be the first one in the structure.  */
5687
5688       /* Find the first field, ignoring non FIELD_DECL things which will
5689          have been created by C++.  */
5690       for (field = TYPE_FIELDS (type);
5691            field && TREE_CODE (field) != FIELD_DECL;
5692            field = DECL_CHAIN (field))
5693         continue;
5694
5695       if (field == NULL)
5696         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5697
5698       /* Check that the first field is valid for returning in a register.  */
5699
5700       /* ... Floats are not allowed */
5701       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5702         return true;
5703
5704       /* ... Aggregates that are not themselves valid for returning in
5705          a register are not allowed.  */
5706       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5707         return true;
5708
5709       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5710          since they are not addressable.  */
5711       for (field = DECL_CHAIN (field);
5712            field;
5713            field = DECL_CHAIN (field))
5714         {
5715           if (TREE_CODE (field) != FIELD_DECL)
5716             continue;
5717
5718           if (!DECL_BIT_FIELD_TYPE (field))
5719             return true;
5720         }
5721
5722       return false;
5723     }
5724
5725   if (TREE_CODE (type) == UNION_TYPE)
5726     {
5727       tree field;
5728
5729       /* Unions can be returned in registers if every element is
5730          integral, or can be returned in an integer register.  */
5731       for (field = TYPE_FIELDS (type);
5732            field;
5733            field = DECL_CHAIN (field))
5734         {
5735           if (TREE_CODE (field) != FIELD_DECL)
5736             continue;
5737
5738           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5739             return true;
5740
5741           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5742             return true;
5743         }
5744
5745       return false;
5746     }
5747 #endif /* not ARM_WINCE */
5748
5749   /* Return all other types in memory.  */
5750   return true;
5751 }
5752
5753 const struct pcs_attribute_arg
5754 {
5755   const char *arg;
5756   enum arm_pcs value;
5757 } pcs_attribute_args[] =
5758   {
5759     {"aapcs", ARM_PCS_AAPCS},
5760     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5761 #if 0
5762     /* We could recognize these, but changes would be needed elsewhere
5763      * to implement them.  */
5764     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5765     {"atpcs", ARM_PCS_ATPCS},
5766     {"apcs", ARM_PCS_APCS},
5767 #endif
5768     {NULL, ARM_PCS_UNKNOWN}
5769   };
5770
5771 static enum arm_pcs
5772 arm_pcs_from_attribute (tree attr)
5773 {
5774   const struct pcs_attribute_arg *ptr;
5775   const char *arg;
5776
5777   /* Get the value of the argument.  */
5778   if (TREE_VALUE (attr) == NULL_TREE
5779       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5780     return ARM_PCS_UNKNOWN;
5781
5782   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5783
5784   /* Check it against the list of known arguments.  */
5785   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5786     if (streq (arg, ptr->arg))
5787       return ptr->value;
5788
5789   /* An unrecognized interrupt type.  */
5790   return ARM_PCS_UNKNOWN;
5791 }
5792
5793 /* Get the PCS variant to use for this call.  TYPE is the function's type
5794    specification, DECL is the specific declartion.  DECL may be null if
5795    the call could be indirect or if this is a library call.  */
5796 static enum arm_pcs
5797 arm_get_pcs_model (const_tree type, const_tree decl)
5798 {
5799   bool user_convention = false;
5800   enum arm_pcs user_pcs = arm_pcs_default;
5801   tree attr;
5802
5803   gcc_assert (type);
5804
5805   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5806   if (attr)
5807     {
5808       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5809       user_convention = true;
5810     }
5811
5812   if (TARGET_AAPCS_BASED)
5813     {
5814       /* Detect varargs functions.  These always use the base rules
5815          (no argument is ever a candidate for a co-processor
5816          register).  */
5817       bool base_rules = stdarg_p (type);
5818
5819       if (user_convention)
5820         {
5821           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5822             sorry ("non-AAPCS derived PCS variant");
5823           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5824             error ("variadic functions must use the base AAPCS variant");
5825         }
5826
5827       if (base_rules)
5828         return ARM_PCS_AAPCS;
5829       else if (user_convention)
5830         return user_pcs;
5831       else if (decl && flag_unit_at_a_time)
5832         {
5833           /* Local functions never leak outside this compilation unit,
5834              so we are free to use whatever conventions are
5835              appropriate.  */
5836           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5837           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5838           if (i && i->local)
5839             return ARM_PCS_AAPCS_LOCAL;
5840         }
5841     }
5842   else if (user_convention && user_pcs != arm_pcs_default)
5843     sorry ("PCS variant");
5844
5845   /* For everything else we use the target's default.  */
5846   return arm_pcs_default;
5847 }
5848
5849
5850 static void
5851 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5852                     const_tree fntype ATTRIBUTE_UNUSED,
5853                     rtx libcall ATTRIBUTE_UNUSED,
5854                     const_tree fndecl ATTRIBUTE_UNUSED)
5855 {
5856   /* Record the unallocated VFP registers.  */
5857   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5858   pcum->aapcs_vfp_reg_alloc = 0;
5859 }
5860
5861 /* Walk down the type tree of TYPE counting consecutive base elements.
5862    If *MODEP is VOIDmode, then set it to the first valid floating point
5863    type.  If a non-floating point type is found, or if a floating point
5864    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5865    otherwise return the count in the sub-tree.  */
5866 static int
5867 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5868 {
5869   machine_mode mode;
5870   HOST_WIDE_INT size;
5871
5872   switch (TREE_CODE (type))
5873     {
5874     case REAL_TYPE:
5875       mode = TYPE_MODE (type);
5876       if (mode != DFmode && mode != SFmode && mode != HFmode)
5877         return -1;
5878
5879       if (*modep == VOIDmode)
5880         *modep = mode;
5881
5882       if (*modep == mode)
5883         return 1;
5884
5885       break;
5886
5887     case COMPLEX_TYPE:
5888       mode = TYPE_MODE (TREE_TYPE (type));
5889       if (mode != DFmode && mode != SFmode)
5890         return -1;
5891
5892       if (*modep == VOIDmode)
5893         *modep = mode;
5894
5895       if (*modep == mode)
5896         return 2;
5897
5898       break;
5899
5900     case VECTOR_TYPE:
5901       /* Use V2SImode and V4SImode as representatives of all 64-bit
5902          and 128-bit vector types, whether or not those modes are
5903          supported with the present options.  */
5904       size = int_size_in_bytes (type);
5905       switch (size)
5906         {
5907         case 8:
5908           mode = V2SImode;
5909           break;
5910         case 16:
5911           mode = V4SImode;
5912           break;
5913         default:
5914           return -1;
5915         }
5916
5917       if (*modep == VOIDmode)
5918         *modep = mode;
5919
5920       /* Vector modes are considered to be opaque: two vectors are
5921          equivalent for the purposes of being homogeneous aggregates
5922          if they are the same size.  */
5923       if (*modep == mode)
5924         return 1;
5925
5926       break;
5927
5928     case ARRAY_TYPE:
5929       {
5930         int count;
5931         tree index = TYPE_DOMAIN (type);
5932
5933         /* Can't handle incomplete types nor sizes that are not
5934            fixed.  */
5935         if (!COMPLETE_TYPE_P (type)
5936             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5937           return -1;
5938
5939         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5940         if (count == -1
5941             || !index
5942             || !TYPE_MAX_VALUE (index)
5943             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5944             || !TYPE_MIN_VALUE (index)
5945             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5946             || count < 0)
5947           return -1;
5948
5949         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5950                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5951
5952         /* There must be no padding.  */
5953         if (wi::to_wide (TYPE_SIZE (type))
5954             != count * GET_MODE_BITSIZE (*modep))
5955           return -1;
5956
5957         return count;
5958       }
5959
5960     case RECORD_TYPE:
5961       {
5962         int count = 0;
5963         int sub_count;
5964         tree field;
5965
5966         /* Can't handle incomplete types nor sizes that are not
5967            fixed.  */
5968         if (!COMPLETE_TYPE_P (type)
5969             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5970           return -1;
5971
5972         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5973           {
5974             if (TREE_CODE (field) != FIELD_DECL)
5975               continue;
5976
5977             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5978             if (sub_count < 0)
5979               return -1;
5980             count += sub_count;
5981           }
5982
5983         /* There must be no padding.  */
5984         if (wi::to_wide (TYPE_SIZE (type))
5985             != count * GET_MODE_BITSIZE (*modep))
5986           return -1;
5987
5988         return count;
5989       }
5990
5991     case UNION_TYPE:
5992     case QUAL_UNION_TYPE:
5993       {
5994         /* These aren't very interesting except in a degenerate case.  */
5995         int count = 0;
5996         int sub_count;
5997         tree field;
5998
5999         /* Can't handle incomplete types nor sizes that are not
6000            fixed.  */
6001         if (!COMPLETE_TYPE_P (type)
6002             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6003           return -1;
6004
6005         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6006           {
6007             if (TREE_CODE (field) != FIELD_DECL)
6008               continue;
6009
6010             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6011             if (sub_count < 0)
6012               return -1;
6013             count = count > sub_count ? count : sub_count;
6014           }
6015
6016         /* There must be no padding.  */
6017         if (wi::to_wide (TYPE_SIZE (type))
6018             != count * GET_MODE_BITSIZE (*modep))
6019           return -1;
6020
6021         return count;
6022       }
6023
6024     default:
6025       break;
6026     }
6027
6028   return -1;
6029 }
6030
6031 /* Return true if PCS_VARIANT should use VFP registers.  */
6032 static bool
6033 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6034 {
6035   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6036     {
6037       static bool seen_thumb1_vfp = false;
6038
6039       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6040         {
6041           sorry ("Thumb-1 hard-float VFP ABI");
6042           /* sorry() is not immediately fatal, so only display this once.  */
6043           seen_thumb1_vfp = true;
6044         }
6045
6046       return true;
6047     }
6048
6049   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6050     return false;
6051
6052   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6053           (TARGET_VFP_DOUBLE || !is_double));
6054 }
6055
6056 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6057    suitable for passing or returning in VFP registers for the PCS
6058    variant selected.  If it is, then *BASE_MODE is updated to contain
6059    a machine mode describing each element of the argument's type and
6060    *COUNT to hold the number of such elements.  */
6061 static bool
6062 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6063                                        machine_mode mode, const_tree type,
6064                                        machine_mode *base_mode, int *count)
6065 {
6066   machine_mode new_mode = VOIDmode;
6067
6068   /* If we have the type information, prefer that to working things
6069      out from the mode.  */
6070   if (type)
6071     {
6072       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6073
6074       if (ag_count > 0 && ag_count <= 4)
6075         *count = ag_count;
6076       else
6077         return false;
6078     }
6079   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6080            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6081            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6082     {
6083       *count = 1;
6084       new_mode = mode;
6085     }
6086   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6087     {
6088       *count = 2;
6089       new_mode = (mode == DCmode ? DFmode : SFmode);
6090     }
6091   else
6092     return false;
6093
6094
6095   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6096     return false;
6097
6098   *base_mode = new_mode;
6099   return true;
6100 }
6101
6102 static bool
6103 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6104                                machine_mode mode, const_tree type)
6105 {
6106   int count ATTRIBUTE_UNUSED;
6107   machine_mode ag_mode ATTRIBUTE_UNUSED;
6108
6109   if (!use_vfp_abi (pcs_variant, false))
6110     return false;
6111   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6112                                                 &ag_mode, &count);
6113 }
6114
6115 static bool
6116 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6117                              const_tree type)
6118 {
6119   if (!use_vfp_abi (pcum->pcs_variant, false))
6120     return false;
6121
6122   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6123                                                 &pcum->aapcs_vfp_rmode,
6124                                                 &pcum->aapcs_vfp_rcount);
6125 }
6126
6127 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6128    for the behaviour of this function.  */
6129
6130 static bool
6131 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6132                     const_tree type  ATTRIBUTE_UNUSED)
6133 {
6134   int rmode_size
6135     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6136   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6137   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6138   int regno;
6139
6140   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6141     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6142       {
6143         pcum->aapcs_vfp_reg_alloc = mask << regno;
6144         if (mode == BLKmode
6145             || (mode == TImode && ! TARGET_NEON)
6146             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6147           {
6148             int i;
6149             int rcount = pcum->aapcs_vfp_rcount;
6150             int rshift = shift;
6151             machine_mode rmode = pcum->aapcs_vfp_rmode;
6152             rtx par;
6153             if (!TARGET_NEON)
6154               {
6155                 /* Avoid using unsupported vector modes.  */
6156                 if (rmode == V2SImode)
6157                   rmode = DImode;
6158                 else if (rmode == V4SImode)
6159                   {
6160                     rmode = DImode;
6161                     rcount *= 2;
6162                     rshift /= 2;
6163                   }
6164               }
6165             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6166             for (i = 0; i < rcount; i++)
6167               {
6168                 rtx tmp = gen_rtx_REG (rmode,
6169                                        FIRST_VFP_REGNUM + regno + i * rshift);
6170                 tmp = gen_rtx_EXPR_LIST
6171                   (VOIDmode, tmp,
6172                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6173                 XVECEXP (par, 0, i) = tmp;
6174               }
6175
6176             pcum->aapcs_reg = par;
6177           }
6178         else
6179           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6180         return true;
6181       }
6182   return false;
6183 }
6184
6185 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6186    comment there for the behaviour of this function.  */
6187
6188 static rtx
6189 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6190                                machine_mode mode,
6191                                const_tree type ATTRIBUTE_UNUSED)
6192 {
6193   if (!use_vfp_abi (pcs_variant, false))
6194     return NULL;
6195
6196   if (mode == BLKmode
6197       || (GET_MODE_CLASS (mode) == MODE_INT
6198           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6199           && !TARGET_NEON))
6200     {
6201       int count;
6202       machine_mode ag_mode;
6203       int i;
6204       rtx par;
6205       int shift;
6206
6207       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6208                                              &ag_mode, &count);
6209
6210       if (!TARGET_NEON)
6211         {
6212           if (ag_mode == V2SImode)
6213             ag_mode = DImode;
6214           else if (ag_mode == V4SImode)
6215             {
6216               ag_mode = DImode;
6217               count *= 2;
6218             }
6219         }
6220       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6221       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6222       for (i = 0; i < count; i++)
6223         {
6224           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6225           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6226                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6227           XVECEXP (par, 0, i) = tmp;
6228         }
6229
6230       return par;
6231     }
6232
6233   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6234 }
6235
6236 static void
6237 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6238                    machine_mode mode  ATTRIBUTE_UNUSED,
6239                    const_tree type  ATTRIBUTE_UNUSED)
6240 {
6241   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6242   pcum->aapcs_vfp_reg_alloc = 0;
6243   return;
6244 }
6245
6246 #define AAPCS_CP(X)                             \
6247   {                                             \
6248     aapcs_ ## X ## _cum_init,                   \
6249     aapcs_ ## X ## _is_call_candidate,          \
6250     aapcs_ ## X ## _allocate,                   \
6251     aapcs_ ## X ## _is_return_candidate,        \
6252     aapcs_ ## X ## _allocate_return_reg,        \
6253     aapcs_ ## X ## _advance                     \
6254   }
6255
6256 /* Table of co-processors that can be used to pass arguments in
6257    registers.  Idealy no arugment should be a candidate for more than
6258    one co-processor table entry, but the table is processed in order
6259    and stops after the first match.  If that entry then fails to put
6260    the argument into a co-processor register, the argument will go on
6261    the stack.  */
6262 static struct
6263 {
6264   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6265   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6266
6267   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6268      BLKmode) is a candidate for this co-processor's registers; this
6269      function should ignore any position-dependent state in
6270      CUMULATIVE_ARGS and only use call-type dependent information.  */
6271   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6272
6273   /* Return true if the argument does get a co-processor register; it
6274      should set aapcs_reg to an RTX of the register allocated as is
6275      required for a return from FUNCTION_ARG.  */
6276   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6277
6278   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6279      be returned in this co-processor's registers.  */
6280   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6281
6282   /* Allocate and return an RTX element to hold the return type of a call.  This
6283      routine must not fail and will only be called if is_return_candidate
6284      returned true with the same parameters.  */
6285   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6286
6287   /* Finish processing this argument and prepare to start processing
6288      the next one.  */
6289   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6290 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6291   {
6292     AAPCS_CP(vfp)
6293   };
6294
6295 #undef AAPCS_CP
6296
6297 static int
6298 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6299                           const_tree type)
6300 {
6301   int i;
6302
6303   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6304     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6305       return i;
6306
6307   return -1;
6308 }
6309
6310 static int
6311 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6312 {
6313   /* We aren't passed a decl, so we can't check that a call is local.
6314      However, it isn't clear that that would be a win anyway, since it
6315      might limit some tail-calling opportunities.  */
6316   enum arm_pcs pcs_variant;
6317
6318   if (fntype)
6319     {
6320       const_tree fndecl = NULL_TREE;
6321
6322       if (TREE_CODE (fntype) == FUNCTION_DECL)
6323         {
6324           fndecl = fntype;
6325           fntype = TREE_TYPE (fntype);
6326         }
6327
6328       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6329     }
6330   else
6331     pcs_variant = arm_pcs_default;
6332
6333   if (pcs_variant != ARM_PCS_AAPCS)
6334     {
6335       int i;
6336
6337       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6338         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6339                                                         TYPE_MODE (type),
6340                                                         type))
6341           return i;
6342     }
6343   return -1;
6344 }
6345
6346 static rtx
6347 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6348                            const_tree fntype)
6349 {
6350   /* We aren't passed a decl, so we can't check that a call is local.
6351      However, it isn't clear that that would be a win anyway, since it
6352      might limit some tail-calling opportunities.  */
6353   enum arm_pcs pcs_variant;
6354   int unsignedp ATTRIBUTE_UNUSED;
6355
6356   if (fntype)
6357     {
6358       const_tree fndecl = NULL_TREE;
6359
6360       if (TREE_CODE (fntype) == FUNCTION_DECL)
6361         {
6362           fndecl = fntype;
6363           fntype = TREE_TYPE (fntype);
6364         }
6365
6366       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6367     }
6368   else
6369     pcs_variant = arm_pcs_default;
6370
6371   /* Promote integer types.  */
6372   if (type && INTEGRAL_TYPE_P (type))
6373     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6374
6375   if (pcs_variant != ARM_PCS_AAPCS)
6376     {
6377       int i;
6378
6379       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6380         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6381                                                         type))
6382           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6383                                                              mode, type);
6384     }
6385
6386   /* Promotes small structs returned in a register to full-word size
6387      for big-endian AAPCS.  */
6388   if (type && arm_return_in_msb (type))
6389     {
6390       HOST_WIDE_INT size = int_size_in_bytes (type);
6391       if (size % UNITS_PER_WORD != 0)
6392         {
6393           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6394           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6395         }
6396     }
6397
6398   return gen_rtx_REG (mode, R0_REGNUM);
6399 }
6400
6401 static rtx
6402 aapcs_libcall_value (machine_mode mode)
6403 {
6404   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6405       && GET_MODE_SIZE (mode) <= 4)
6406     mode = SImode;
6407
6408   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6409 }
6410
6411 /* Lay out a function argument using the AAPCS rules.  The rule
6412    numbers referred to here are those in the AAPCS.  */
6413 static void
6414 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6415                   const_tree type, bool named)
6416 {
6417   int nregs, nregs2;
6418   int ncrn;
6419
6420   /* We only need to do this once per argument.  */
6421   if (pcum->aapcs_arg_processed)
6422     return;
6423
6424   pcum->aapcs_arg_processed = true;
6425
6426   /* Special case: if named is false then we are handling an incoming
6427      anonymous argument which is on the stack.  */
6428   if (!named)
6429     return;
6430
6431   /* Is this a potential co-processor register candidate?  */
6432   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6433     {
6434       int slot = aapcs_select_call_coproc (pcum, mode, type);
6435       pcum->aapcs_cprc_slot = slot;
6436
6437       /* We don't have to apply any of the rules from part B of the
6438          preparation phase, these are handled elsewhere in the
6439          compiler.  */
6440
6441       if (slot >= 0)
6442         {
6443           /* A Co-processor register candidate goes either in its own
6444              class of registers or on the stack.  */
6445           if (!pcum->aapcs_cprc_failed[slot])
6446             {
6447               /* C1.cp - Try to allocate the argument to co-processor
6448                  registers.  */
6449               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6450                 return;
6451
6452               /* C2.cp - Put the argument on the stack and note that we
6453                  can't assign any more candidates in this slot.  We also
6454                  need to note that we have allocated stack space, so that
6455                  we won't later try to split a non-cprc candidate between
6456                  core registers and the stack.  */
6457               pcum->aapcs_cprc_failed[slot] = true;
6458               pcum->can_split = false;
6459             }
6460
6461           /* We didn't get a register, so this argument goes on the
6462              stack.  */
6463           gcc_assert (pcum->can_split == false);
6464           return;
6465         }
6466     }
6467
6468   /* C3 - For double-word aligned arguments, round the NCRN up to the
6469      next even number.  */
6470   ncrn = pcum->aapcs_ncrn;
6471   if (ncrn & 1)
6472     {
6473       int res = arm_needs_doubleword_align (mode, type);
6474       /* Only warn during RTL expansion of call stmts, otherwise we would
6475          warn e.g. during gimplification even on functions that will be
6476          always inlined, and we'd warn multiple times.  Don't warn when
6477          called in expand_function_start either, as we warn instead in
6478          arm_function_arg_boundary in that case.  */
6479       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6480         inform (input_location, "parameter passing for argument of type "
6481                 "%qT changed in GCC 7.1", type);
6482       else if (res > 0)
6483         ncrn++;
6484     }
6485
6486   nregs = ARM_NUM_REGS2(mode, type);
6487
6488   /* Sigh, this test should really assert that nregs > 0, but a GCC
6489      extension allows empty structs and then gives them empty size; it
6490      then allows such a structure to be passed by value.  For some of
6491      the code below we have to pretend that such an argument has
6492      non-zero size so that we 'locate' it correctly either in
6493      registers or on the stack.  */
6494   gcc_assert (nregs >= 0);
6495
6496   nregs2 = nregs ? nregs : 1;
6497
6498   /* C4 - Argument fits entirely in core registers.  */
6499   if (ncrn + nregs2 <= NUM_ARG_REGS)
6500     {
6501       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6502       pcum->aapcs_next_ncrn = ncrn + nregs;
6503       return;
6504     }
6505
6506   /* C5 - Some core registers left and there are no arguments already
6507      on the stack: split this argument between the remaining core
6508      registers and the stack.  */
6509   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6510     {
6511       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6512       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6513       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6514       return;
6515     }
6516
6517   /* C6 - NCRN is set to 4.  */
6518   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6519
6520   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6521   return;
6522 }
6523
6524 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6525    for a call to a function whose data type is FNTYPE.
6526    For a library call, FNTYPE is NULL.  */
6527 void
6528 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6529                           rtx libname,
6530                           tree fndecl ATTRIBUTE_UNUSED)
6531 {
6532   /* Long call handling.  */
6533   if (fntype)
6534     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6535   else
6536     pcum->pcs_variant = arm_pcs_default;
6537
6538   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6539     {
6540       if (arm_libcall_uses_aapcs_base (libname))
6541         pcum->pcs_variant = ARM_PCS_AAPCS;
6542
6543       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6544       pcum->aapcs_reg = NULL_RTX;
6545       pcum->aapcs_partial = 0;
6546       pcum->aapcs_arg_processed = false;
6547       pcum->aapcs_cprc_slot = -1;
6548       pcum->can_split = true;
6549
6550       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6551         {
6552           int i;
6553
6554           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6555             {
6556               pcum->aapcs_cprc_failed[i] = false;
6557               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6558             }
6559         }
6560       return;
6561     }
6562
6563   /* Legacy ABIs */
6564
6565   /* On the ARM, the offset starts at 0.  */
6566   pcum->nregs = 0;
6567   pcum->iwmmxt_nregs = 0;
6568   pcum->can_split = true;
6569
6570   /* Varargs vectors are treated the same as long long.
6571      named_count avoids having to change the way arm handles 'named' */
6572   pcum->named_count = 0;
6573   pcum->nargs = 0;
6574
6575   if (TARGET_REALLY_IWMMXT && fntype)
6576     {
6577       tree fn_arg;
6578
6579       for (fn_arg = TYPE_ARG_TYPES (fntype);
6580            fn_arg;
6581            fn_arg = TREE_CHAIN (fn_arg))
6582         pcum->named_count += 1;
6583
6584       if (! pcum->named_count)
6585         pcum->named_count = INT_MAX;
6586     }
6587 }
6588
6589 /* Return 1 if double word alignment is required for argument passing.
6590    Return -1 if double word alignment used to be required for argument
6591    passing before PR77728 ABI fix, but is not required anymore.
6592    Return 0 if double word alignment is not required and wasn't requried
6593    before either.  */
6594 static int
6595 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6596 {
6597   if (!type)
6598     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6599
6600   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6601   if (!AGGREGATE_TYPE_P (type))
6602     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6603
6604   /* Array types: Use member alignment of element type.  */
6605   if (TREE_CODE (type) == ARRAY_TYPE)
6606     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6607
6608   int ret = 0;
6609   /* Record/aggregate types: Use greatest member alignment of any member.  */
6610   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6611     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6612       {
6613         if (TREE_CODE (field) == FIELD_DECL)
6614           return 1;
6615         else
6616           /* Before PR77728 fix, we were incorrectly considering also
6617              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6618              Make sure we can warn about that with -Wpsabi.  */
6619           ret = -1;
6620       }
6621
6622   return ret;
6623 }
6624
6625
6626 /* Determine where to put an argument to a function.
6627    Value is zero to push the argument on the stack,
6628    or a hard register in which to store the argument.
6629
6630    MODE is the argument's machine mode.
6631    TYPE is the data type of the argument (as a tree).
6632     This is null for libcalls where that information may
6633     not be available.
6634    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6635     the preceding args and about the function being called.
6636    NAMED is nonzero if this argument is a named parameter
6637     (otherwise it is an extra parameter matching an ellipsis).
6638
6639    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6640    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6641    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6642    defined), say it is passed in the stack (function_prologue will
6643    indeed make it pass in the stack if necessary).  */
6644
6645 static rtx
6646 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6647                   const_tree type, bool named)
6648 {
6649   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6650   int nregs;
6651
6652   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6653      a call insn (op3 of a call_value insn).  */
6654   if (mode == VOIDmode)
6655     return const0_rtx;
6656
6657   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6658     {
6659       aapcs_layout_arg (pcum, mode, type, named);
6660       return pcum->aapcs_reg;
6661     }
6662
6663   /* Varargs vectors are treated the same as long long.
6664      named_count avoids having to change the way arm handles 'named' */
6665   if (TARGET_IWMMXT_ABI
6666       && arm_vector_mode_supported_p (mode)
6667       && pcum->named_count > pcum->nargs + 1)
6668     {
6669       if (pcum->iwmmxt_nregs <= 9)
6670         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6671       else
6672         {
6673           pcum->can_split = false;
6674           return NULL_RTX;
6675         }
6676     }
6677
6678   /* Put doubleword aligned quantities in even register pairs.  */
6679   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6680     {
6681       int res = arm_needs_doubleword_align (mode, type);
6682       if (res < 0 && warn_psabi)
6683         inform (input_location, "parameter passing for argument of type "
6684                 "%qT changed in GCC 7.1", type);
6685       else if (res > 0)
6686         pcum->nregs++;
6687     }
6688
6689   /* Only allow splitting an arg between regs and memory if all preceding
6690      args were allocated to regs.  For args passed by reference we only count
6691      the reference pointer.  */
6692   if (pcum->can_split)
6693     nregs = 1;
6694   else
6695     nregs = ARM_NUM_REGS2 (mode, type);
6696
6697   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6698     return NULL_RTX;
6699
6700   return gen_rtx_REG (mode, pcum->nregs);
6701 }
6702
6703 static unsigned int
6704 arm_function_arg_boundary (machine_mode mode, const_tree type)
6705 {
6706   if (!ARM_DOUBLEWORD_ALIGN)
6707     return PARM_BOUNDARY;
6708
6709   int res = arm_needs_doubleword_align (mode, type);
6710   if (res < 0 && warn_psabi)
6711     inform (input_location, "parameter passing for argument of type %qT "
6712             "changed in GCC 7.1", type);
6713
6714   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6715 }
6716
6717 static int
6718 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6719                        tree type, bool named)
6720 {
6721   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6722   int nregs = pcum->nregs;
6723
6724   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6725     {
6726       aapcs_layout_arg (pcum, mode, type, named);
6727       return pcum->aapcs_partial;
6728     }
6729
6730   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6731     return 0;
6732
6733   if (NUM_ARG_REGS > nregs
6734       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6735       && pcum->can_split)
6736     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6737
6738   return 0;
6739 }
6740
6741 /* Update the data in PCUM to advance over an argument
6742    of mode MODE and data type TYPE.
6743    (TYPE is null for libcalls where that information may not be available.)  */
6744
6745 static void
6746 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6747                           const_tree type, bool named)
6748 {
6749   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6750
6751   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6752     {
6753       aapcs_layout_arg (pcum, mode, type, named);
6754
6755       if (pcum->aapcs_cprc_slot >= 0)
6756         {
6757           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6758                                                               type);
6759           pcum->aapcs_cprc_slot = -1;
6760         }
6761
6762       /* Generic stuff.  */
6763       pcum->aapcs_arg_processed = false;
6764       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6765       pcum->aapcs_reg = NULL_RTX;
6766       pcum->aapcs_partial = 0;
6767     }
6768   else
6769     {
6770       pcum->nargs += 1;
6771       if (arm_vector_mode_supported_p (mode)
6772           && pcum->named_count > pcum->nargs
6773           && TARGET_IWMMXT_ABI)
6774         pcum->iwmmxt_nregs += 1;
6775       else
6776         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6777     }
6778 }
6779
6780 /* Variable sized types are passed by reference.  This is a GCC
6781    extension to the ARM ABI.  */
6782
6783 static bool
6784 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6785                        machine_mode mode ATTRIBUTE_UNUSED,
6786                        const_tree type, bool named ATTRIBUTE_UNUSED)
6787 {
6788   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6789 }
6790 \f
6791 /* Encode the current state of the #pragma [no_]long_calls.  */
6792 typedef enum
6793 {
6794   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6795   LONG,         /* #pragma long_calls is in effect.  */
6796   SHORT         /* #pragma no_long_calls is in effect.  */
6797 } arm_pragma_enum;
6798
6799 static arm_pragma_enum arm_pragma_long_calls = OFF;
6800
6801 void
6802 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6803 {
6804   arm_pragma_long_calls = LONG;
6805 }
6806
6807 void
6808 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6809 {
6810   arm_pragma_long_calls = SHORT;
6811 }
6812
6813 void
6814 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6815 {
6816   arm_pragma_long_calls = OFF;
6817 }
6818 \f
6819 /* Handle an attribute requiring a FUNCTION_DECL;
6820    arguments as in struct attribute_spec.handler.  */
6821 static tree
6822 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6823                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6824 {
6825   if (TREE_CODE (*node) != FUNCTION_DECL)
6826     {
6827       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6828                name);
6829       *no_add_attrs = true;
6830     }
6831
6832   return NULL_TREE;
6833 }
6834
6835 /* Handle an "interrupt" or "isr" attribute;
6836    arguments as in struct attribute_spec.handler.  */
6837 static tree
6838 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6839                           bool *no_add_attrs)
6840 {
6841   if (DECL_P (*node))
6842     {
6843       if (TREE_CODE (*node) != FUNCTION_DECL)
6844         {
6845           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6846                    name);
6847           *no_add_attrs = true;
6848         }
6849       /* FIXME: the argument if any is checked for type attributes;
6850          should it be checked for decl ones?  */
6851     }
6852   else
6853     {
6854       if (TREE_CODE (*node) == FUNCTION_TYPE
6855           || TREE_CODE (*node) == METHOD_TYPE)
6856         {
6857           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6858             {
6859               warning (OPT_Wattributes, "%qE attribute ignored",
6860                        name);
6861               *no_add_attrs = true;
6862             }
6863         }
6864       else if (TREE_CODE (*node) == POINTER_TYPE
6865                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6866                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6867                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6868         {
6869           *node = build_variant_type_copy (*node);
6870           TREE_TYPE (*node) = build_type_attribute_variant
6871             (TREE_TYPE (*node),
6872              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6873           *no_add_attrs = true;
6874         }
6875       else
6876         {
6877           /* Possibly pass this attribute on from the type to a decl.  */
6878           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6879                        | (int) ATTR_FLAG_FUNCTION_NEXT
6880                        | (int) ATTR_FLAG_ARRAY_NEXT))
6881             {
6882               *no_add_attrs = true;
6883               return tree_cons (name, args, NULL_TREE);
6884             }
6885           else
6886             {
6887               warning (OPT_Wattributes, "%qE attribute ignored",
6888                        name);
6889             }
6890         }
6891     }
6892
6893   return NULL_TREE;
6894 }
6895
6896 /* Handle a "pcs" attribute; arguments as in struct
6897    attribute_spec.handler.  */
6898 static tree
6899 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6900                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6901 {
6902   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6903     {
6904       warning (OPT_Wattributes, "%qE attribute ignored", name);
6905       *no_add_attrs = true;
6906     }
6907   return NULL_TREE;
6908 }
6909
6910 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6911 /* Handle the "notshared" attribute.  This attribute is another way of
6912    requesting hidden visibility.  ARM's compiler supports
6913    "__declspec(notshared)"; we support the same thing via an
6914    attribute.  */
6915
6916 static tree
6917 arm_handle_notshared_attribute (tree *node,
6918                                 tree name ATTRIBUTE_UNUSED,
6919                                 tree args ATTRIBUTE_UNUSED,
6920                                 int flags ATTRIBUTE_UNUSED,
6921                                 bool *no_add_attrs)
6922 {
6923   tree decl = TYPE_NAME (*node);
6924
6925   if (decl)
6926     {
6927       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6928       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6929       *no_add_attrs = false;
6930     }
6931   return NULL_TREE;
6932 }
6933 #endif
6934
6935 /* This function returns true if a function with declaration FNDECL and type
6936    FNTYPE uses the stack to pass arguments or return variables and false
6937    otherwise.  This is used for functions with the attributes
6938    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6939    diagnostic messages if the stack is used.  NAME is the name of the attribute
6940    used.  */
6941
6942 static bool
6943 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6944 {
6945   function_args_iterator args_iter;
6946   CUMULATIVE_ARGS args_so_far_v;
6947   cumulative_args_t args_so_far;
6948   bool first_param = true;
6949   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6950
6951   /* Error out if any argument is passed on the stack.  */
6952   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6953   args_so_far = pack_cumulative_args (&args_so_far_v);
6954   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6955     {
6956       rtx arg_rtx;
6957       machine_mode arg_mode = TYPE_MODE (arg_type);
6958
6959       prev_arg_type = arg_type;
6960       if (VOID_TYPE_P (arg_type))
6961         continue;
6962
6963       if (!first_param)
6964         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6965       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6966       if (!arg_rtx
6967           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6968         {
6969           error ("%qE attribute not available to functions with arguments "
6970                  "passed on the stack", name);
6971           return true;
6972         }
6973       first_param = false;
6974     }
6975
6976   /* Error out for variadic functions since we cannot control how many
6977      arguments will be passed and thus stack could be used.  stdarg_p () is not
6978      used for the checking to avoid browsing arguments twice.  */
6979   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6980     {
6981       error ("%qE attribute not available to functions with variable number "
6982              "of arguments", name);
6983       return true;
6984     }
6985
6986   /* Error out if return value is passed on the stack.  */
6987   ret_type = TREE_TYPE (fntype);
6988   if (arm_return_in_memory (ret_type, fntype))
6989     {
6990       error ("%qE attribute not available to functions that return value on "
6991              "the stack", name);
6992       return true;
6993     }
6994   return false;
6995 }
6996
6997 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6998    function will check whether the attribute is allowed here and will add the
6999    attribute to the function declaration tree or otherwise issue a warning.  */
7000
7001 static tree
7002 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7003                                  tree /* args */,
7004                                  int /* flags */,
7005                                  bool *no_add_attrs)
7006 {
7007   tree fndecl;
7008
7009   if (!use_cmse)
7010     {
7011       *no_add_attrs = true;
7012       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7013                name);
7014       return NULL_TREE;
7015     }
7016
7017   /* Ignore attribute for function types.  */
7018   if (TREE_CODE (*node) != FUNCTION_DECL)
7019     {
7020       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7021                name);
7022       *no_add_attrs = true;
7023       return NULL_TREE;
7024     }
7025
7026   fndecl = *node;
7027
7028   /* Warn for static linkage functions.  */
7029   if (!TREE_PUBLIC (fndecl))
7030     {
7031       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7032                "with static linkage", name);
7033       *no_add_attrs = true;
7034       return NULL_TREE;
7035     }
7036
7037   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7038                                                 TREE_TYPE (fndecl));
7039   return NULL_TREE;
7040 }
7041
7042
7043 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7044    function will check whether the attribute is allowed here and will add the
7045    attribute to the function type tree or otherwise issue a diagnostic.  The
7046    reason we check this at declaration time is to only allow the use of the
7047    attribute with declarations of function pointers and not function
7048    declarations.  This function checks NODE is of the expected type and issues
7049    diagnostics otherwise using NAME.  If it is not of the expected type
7050    *NO_ADD_ATTRS will be set to true.  */
7051
7052 static tree
7053 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7054                                  tree /* args */,
7055                                  int /* flags */,
7056                                  bool *no_add_attrs)
7057 {
7058   tree decl = NULL_TREE, fntype = NULL_TREE;
7059   tree type;
7060
7061   if (!use_cmse)
7062     {
7063       *no_add_attrs = true;
7064       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7065                name);
7066       return NULL_TREE;
7067     }
7068
7069   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7070     {
7071       decl = *node;
7072       fntype = TREE_TYPE (decl);
7073     }
7074
7075   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7076     fntype = TREE_TYPE (fntype);
7077
7078   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7079     {
7080         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7081                  "function pointer", name);
7082         *no_add_attrs = true;
7083         return NULL_TREE;
7084     }
7085
7086   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7087
7088   if (*no_add_attrs)
7089     return NULL_TREE;
7090
7091   /* Prevent trees being shared among function types with and without
7092      cmse_nonsecure_call attribute.  */
7093   type = TREE_TYPE (decl);
7094
7095   type = build_distinct_type_copy (type);
7096   TREE_TYPE (decl) = type;
7097   fntype = type;
7098
7099   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7100     {
7101       type = fntype;
7102       fntype = TREE_TYPE (fntype);
7103       fntype = build_distinct_type_copy (fntype);
7104       TREE_TYPE (type) = fntype;
7105     }
7106
7107   /* Construct a type attribute and add it to the function type.  */
7108   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7109                           TYPE_ATTRIBUTES (fntype));
7110   TYPE_ATTRIBUTES (fntype) = attrs;
7111   return NULL_TREE;
7112 }
7113
7114 /* Return 0 if the attributes for two types are incompatible, 1 if they
7115    are compatible, and 2 if they are nearly compatible (which causes a
7116    warning to be generated).  */
7117 static int
7118 arm_comp_type_attributes (const_tree type1, const_tree type2)
7119 {
7120   int l1, l2, s1, s2;
7121
7122   /* Check for mismatch of non-default calling convention.  */
7123   if (TREE_CODE (type1) != FUNCTION_TYPE)
7124     return 1;
7125
7126   /* Check for mismatched call attributes.  */
7127   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7128   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7129   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7130   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7131
7132   /* Only bother to check if an attribute is defined.  */
7133   if (l1 | l2 | s1 | s2)
7134     {
7135       /* If one type has an attribute, the other must have the same attribute.  */
7136       if ((l1 != l2) || (s1 != s2))
7137         return 0;
7138
7139       /* Disallow mixed attributes.  */
7140       if ((l1 & s2) || (l2 & s1))
7141         return 0;
7142     }
7143
7144   /* Check for mismatched ISR attribute.  */
7145   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7146   if (! l1)
7147     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7148   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7149   if (! l2)
7150     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7151   if (l1 != l2)
7152     return 0;
7153
7154   l1 = lookup_attribute ("cmse_nonsecure_call",
7155                          TYPE_ATTRIBUTES (type1)) != NULL;
7156   l2 = lookup_attribute ("cmse_nonsecure_call",
7157                          TYPE_ATTRIBUTES (type2)) != NULL;
7158
7159   if (l1 != l2)
7160     return 0;
7161
7162   return 1;
7163 }
7164
7165 /*  Assigns default attributes to newly defined type.  This is used to
7166     set short_call/long_call attributes for function types of
7167     functions defined inside corresponding #pragma scopes.  */
7168 static void
7169 arm_set_default_type_attributes (tree type)
7170 {
7171   /* Add __attribute__ ((long_call)) to all functions, when
7172      inside #pragma long_calls or __attribute__ ((short_call)),
7173      when inside #pragma no_long_calls.  */
7174   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7175     {
7176       tree type_attr_list, attr_name;
7177       type_attr_list = TYPE_ATTRIBUTES (type);
7178
7179       if (arm_pragma_long_calls == LONG)
7180         attr_name = get_identifier ("long_call");
7181       else if (arm_pragma_long_calls == SHORT)
7182         attr_name = get_identifier ("short_call");
7183       else
7184         return;
7185
7186       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7187       TYPE_ATTRIBUTES (type) = type_attr_list;
7188     }
7189 }
7190 \f
7191 /* Return true if DECL is known to be linked into section SECTION.  */
7192
7193 static bool
7194 arm_function_in_section_p (tree decl, section *section)
7195 {
7196   /* We can only be certain about the prevailing symbol definition.  */
7197   if (!decl_binds_to_current_def_p (decl))
7198     return false;
7199
7200   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7201   if (!DECL_SECTION_NAME (decl))
7202     {
7203       /* Make sure that we will not create a unique section for DECL.  */
7204       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7205         return false;
7206     }
7207
7208   return function_section (decl) == section;
7209 }
7210
7211 /* Return nonzero if a 32-bit "long_call" should be generated for
7212    a call from the current function to DECL.  We generate a long_call
7213    if the function:
7214
7215         a.  has an __attribute__((long call))
7216      or b.  is within the scope of a #pragma long_calls
7217      or c.  the -mlong-calls command line switch has been specified
7218
7219    However we do not generate a long call if the function:
7220
7221         d.  has an __attribute__ ((short_call))
7222      or e.  is inside the scope of a #pragma no_long_calls
7223      or f.  is defined in the same section as the current function.  */
7224
7225 bool
7226 arm_is_long_call_p (tree decl)
7227 {
7228   tree attrs;
7229
7230   if (!decl)
7231     return TARGET_LONG_CALLS;
7232
7233   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7234   if (lookup_attribute ("short_call", attrs))
7235     return false;
7236
7237   /* For "f", be conservative, and only cater for cases in which the
7238      whole of the current function is placed in the same section.  */
7239   if (!flag_reorder_blocks_and_partition
7240       && TREE_CODE (decl) == FUNCTION_DECL
7241       && arm_function_in_section_p (decl, current_function_section ()))
7242     return false;
7243
7244   if (lookup_attribute ("long_call", attrs))
7245     return true;
7246
7247   return TARGET_LONG_CALLS;
7248 }
7249
7250 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7251 static bool
7252 arm_function_ok_for_sibcall (tree decl, tree exp)
7253 {
7254   unsigned long func_type;
7255
7256   if (cfun->machine->sibcall_blocked)
7257     return false;
7258
7259   /* Never tailcall something if we are generating code for Thumb-1.  */
7260   if (TARGET_THUMB1)
7261     return false;
7262
7263   /* The PIC register is live on entry to VxWorks PLT entries, so we
7264      must make the call before restoring the PIC register.  */
7265   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7266     return false;
7267
7268   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7269      may be used both as target of the call and base register for restoring
7270      the VFP registers  */
7271   if (TARGET_APCS_FRAME && TARGET_ARM
7272       && TARGET_HARD_FLOAT
7273       && decl && arm_is_long_call_p (decl))
7274     return false;
7275
7276   /* If we are interworking and the function is not declared static
7277      then we can't tail-call it unless we know that it exists in this
7278      compilation unit (since it might be a Thumb routine).  */
7279   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7280       && !TREE_ASM_WRITTEN (decl))
7281     return false;
7282
7283   func_type = arm_current_func_type ();
7284   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7285   if (IS_INTERRUPT (func_type))
7286     return false;
7287
7288   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7289      generated for entry functions themselves.  */
7290   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7291     return false;
7292
7293   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7294      this would complicate matters for later code generation.  */
7295   if (TREE_CODE (exp) == CALL_EXPR)
7296     {
7297       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7298       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7299         return false;
7300     }
7301
7302   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7303     {
7304       /* Check that the return value locations are the same.  For
7305          example that we aren't returning a value from the sibling in
7306          a VFP register but then need to transfer it to a core
7307          register.  */
7308       rtx a, b;
7309       tree decl_or_type = decl;
7310
7311       /* If it is an indirect function pointer, get the function type.  */
7312       if (!decl)
7313         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7314
7315       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7316       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7317                               cfun->decl, false);
7318       if (!rtx_equal_p (a, b))
7319         return false;
7320     }
7321
7322   /* Never tailcall if function may be called with a misaligned SP.  */
7323   if (IS_STACKALIGN (func_type))
7324     return false;
7325
7326   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7327      references should become a NOP.  Don't convert such calls into
7328      sibling calls.  */
7329   if (TARGET_AAPCS_BASED
7330       && arm_abi == ARM_ABI_AAPCS
7331       && decl
7332       && DECL_WEAK (decl))
7333     return false;
7334
7335   /* We cannot do a tailcall for an indirect call by descriptor if all the
7336      argument registers are used because the only register left to load the
7337      address is IP and it will already contain the static chain.  */
7338   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7339     {
7340       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7341       CUMULATIVE_ARGS cum;
7342       cumulative_args_t cum_v;
7343
7344       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7345       cum_v = pack_cumulative_args (&cum);
7346
7347       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7348         {
7349           tree type = TREE_VALUE (t);
7350           if (!VOID_TYPE_P (type))
7351             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7352         }
7353
7354       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7355         return false;
7356     }
7357
7358   /* Everything else is ok.  */
7359   return true;
7360 }
7361
7362 \f
7363 /* Addressing mode support functions.  */
7364
7365 /* Return nonzero if X is a legitimate immediate operand when compiling
7366    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7367 int
7368 legitimate_pic_operand_p (rtx x)
7369 {
7370   if (GET_CODE (x) == SYMBOL_REF
7371       || (GET_CODE (x) == CONST
7372           && GET_CODE (XEXP (x, 0)) == PLUS
7373           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7374     return 0;
7375
7376   return 1;
7377 }
7378
7379 /* Record that the current function needs a PIC register.  Initialize
7380    cfun->machine->pic_reg if we have not already done so.  */
7381
7382 static void
7383 require_pic_register (void)
7384 {
7385   /* A lot of the logic here is made obscure by the fact that this
7386      routine gets called as part of the rtx cost estimation process.
7387      We don't want those calls to affect any assumptions about the real
7388      function; and further, we can't call entry_of_function() until we
7389      start the real expansion process.  */
7390   if (!crtl->uses_pic_offset_table)
7391     {
7392       gcc_assert (can_create_pseudo_p ());
7393       if (arm_pic_register != INVALID_REGNUM
7394           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7395         {
7396           if (!cfun->machine->pic_reg)
7397             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7398
7399           /* Play games to avoid marking the function as needing pic
7400              if we are being called as part of the cost-estimation
7401              process.  */
7402           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7403             crtl->uses_pic_offset_table = 1;
7404         }
7405       else
7406         {
7407           rtx_insn *seq, *insn;
7408
7409           if (!cfun->machine->pic_reg)
7410             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7411
7412           /* Play games to avoid marking the function as needing pic
7413              if we are being called as part of the cost-estimation
7414              process.  */
7415           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7416             {
7417               crtl->uses_pic_offset_table = 1;
7418               start_sequence ();
7419
7420               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7421                   && arm_pic_register > LAST_LO_REGNUM)
7422                 emit_move_insn (cfun->machine->pic_reg,
7423                                 gen_rtx_REG (Pmode, arm_pic_register));
7424               else
7425                 arm_load_pic_register (0UL);
7426
7427               seq = get_insns ();
7428               end_sequence ();
7429
7430               for (insn = seq; insn; insn = NEXT_INSN (insn))
7431                 if (INSN_P (insn))
7432                   INSN_LOCATION (insn) = prologue_location;
7433
7434               /* We can be called during expansion of PHI nodes, where
7435                  we can't yet emit instructions directly in the final
7436                  insn stream.  Queue the insns on the entry edge, they will
7437                  be committed after everything else is expanded.  */
7438               insert_insn_on_edge (seq,
7439                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7440             }
7441         }
7442     }
7443 }
7444
7445 rtx
7446 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7447 {
7448   if (GET_CODE (orig) == SYMBOL_REF
7449       || GET_CODE (orig) == LABEL_REF)
7450     {
7451       if (reg == 0)
7452         {
7453           gcc_assert (can_create_pseudo_p ());
7454           reg = gen_reg_rtx (Pmode);
7455         }
7456
7457       /* VxWorks does not impose a fixed gap between segments; the run-time
7458          gap can be different from the object-file gap.  We therefore can't
7459          use GOTOFF unless we are absolutely sure that the symbol is in the
7460          same segment as the GOT.  Unfortunately, the flexibility of linker
7461          scripts means that we can't be sure of that in general, so assume
7462          that GOTOFF is never valid on VxWorks.  */
7463       /* References to weak symbols cannot be resolved locally: they
7464          may be overridden by a non-weak definition at link time.  */
7465       rtx_insn *insn;
7466       if ((GET_CODE (orig) == LABEL_REF
7467            || (GET_CODE (orig) == SYMBOL_REF
7468                && SYMBOL_REF_LOCAL_P (orig)
7469                && (SYMBOL_REF_DECL (orig)
7470                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7471           && NEED_GOT_RELOC
7472           && arm_pic_data_is_text_relative)
7473         insn = arm_pic_static_addr (orig, reg);
7474       else
7475         {
7476           rtx pat;
7477           rtx mem;
7478
7479           /* If this function doesn't have a pic register, create one now.  */
7480           require_pic_register ();
7481
7482           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7483
7484           /* Make the MEM as close to a constant as possible.  */
7485           mem = SET_SRC (pat);
7486           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7487           MEM_READONLY_P (mem) = 1;
7488           MEM_NOTRAP_P (mem) = 1;
7489
7490           insn = emit_insn (pat);
7491         }
7492
7493       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7494          by loop.  */
7495       set_unique_reg_note (insn, REG_EQUAL, orig);
7496
7497       return reg;
7498     }
7499   else if (GET_CODE (orig) == CONST)
7500     {
7501       rtx base, offset;
7502
7503       if (GET_CODE (XEXP (orig, 0)) == PLUS
7504           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7505         return orig;
7506
7507       /* Handle the case where we have: const (UNSPEC_TLS).  */
7508       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7509           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7510         return orig;
7511
7512       /* Handle the case where we have:
7513          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7514          CONST_INT.  */
7515       if (GET_CODE (XEXP (orig, 0)) == PLUS
7516           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7517           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7518         {
7519           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7520           return orig;
7521         }
7522
7523       if (reg == 0)
7524         {
7525           gcc_assert (can_create_pseudo_p ());
7526           reg = gen_reg_rtx (Pmode);
7527         }
7528
7529       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7530
7531       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7532       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7533                                        base == reg ? 0 : reg);
7534
7535       if (CONST_INT_P (offset))
7536         {
7537           /* The base register doesn't really matter, we only want to
7538              test the index for the appropriate mode.  */
7539           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7540             {
7541               gcc_assert (can_create_pseudo_p ());
7542               offset = force_reg (Pmode, offset);
7543             }
7544
7545           if (CONST_INT_P (offset))
7546             return plus_constant (Pmode, base, INTVAL (offset));
7547         }
7548
7549       if (GET_MODE_SIZE (mode) > 4
7550           && (GET_MODE_CLASS (mode) == MODE_INT
7551               || TARGET_SOFT_FLOAT))
7552         {
7553           emit_insn (gen_addsi3 (reg, base, offset));
7554           return reg;
7555         }
7556
7557       return gen_rtx_PLUS (Pmode, base, offset);
7558     }
7559
7560   return orig;
7561 }
7562
7563
7564 /* Find a spare register to use during the prolog of a function.  */
7565
7566 static int
7567 thumb_find_work_register (unsigned long pushed_regs_mask)
7568 {
7569   int reg;
7570
7571   /* Check the argument registers first as these are call-used.  The
7572      register allocation order means that sometimes r3 might be used
7573      but earlier argument registers might not, so check them all.  */
7574   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7575     if (!df_regs_ever_live_p (reg))
7576       return reg;
7577
7578   /* Before going on to check the call-saved registers we can try a couple
7579      more ways of deducing that r3 is available.  The first is when we are
7580      pushing anonymous arguments onto the stack and we have less than 4
7581      registers worth of fixed arguments(*).  In this case r3 will be part of
7582      the variable argument list and so we can be sure that it will be
7583      pushed right at the start of the function.  Hence it will be available
7584      for the rest of the prologue.
7585      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7586   if (cfun->machine->uses_anonymous_args
7587       && crtl->args.pretend_args_size > 0)
7588     return LAST_ARG_REGNUM;
7589
7590   /* The other case is when we have fixed arguments but less than 4 registers
7591      worth.  In this case r3 might be used in the body of the function, but
7592      it is not being used to convey an argument into the function.  In theory
7593      we could just check crtl->args.size to see how many bytes are
7594      being passed in argument registers, but it seems that it is unreliable.
7595      Sometimes it will have the value 0 when in fact arguments are being
7596      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7597      check the args_info.nregs field as well.  The problem with this field is
7598      that it makes no allowances for arguments that are passed to the
7599      function but which are not used.  Hence we could miss an opportunity
7600      when a function has an unused argument in r3.  But it is better to be
7601      safe than to be sorry.  */
7602   if (! cfun->machine->uses_anonymous_args
7603       && crtl->args.size >= 0
7604       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7605       && (TARGET_AAPCS_BASED
7606           ? crtl->args.info.aapcs_ncrn < 4
7607           : crtl->args.info.nregs < 4))
7608     return LAST_ARG_REGNUM;
7609
7610   /* Otherwise look for a call-saved register that is going to be pushed.  */
7611   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7612     if (pushed_regs_mask & (1 << reg))
7613       return reg;
7614
7615   if (TARGET_THUMB2)
7616     {
7617       /* Thumb-2 can use high regs.  */
7618       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7619         if (pushed_regs_mask & (1 << reg))
7620           return reg;
7621     }
7622   /* Something went wrong - thumb_compute_save_reg_mask()
7623      should have arranged for a suitable register to be pushed.  */
7624   gcc_unreachable ();
7625 }
7626
7627 static GTY(()) int pic_labelno;
7628
7629 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7630    low register.  */
7631
7632 void
7633 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7634 {
7635   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7636
7637   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7638     return;
7639
7640   gcc_assert (flag_pic);
7641
7642   pic_reg = cfun->machine->pic_reg;
7643   if (TARGET_VXWORKS_RTP)
7644     {
7645       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7646       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7647       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7648
7649       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7650
7651       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7652       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7653     }
7654   else
7655     {
7656       /* We use an UNSPEC rather than a LABEL_REF because this label
7657          never appears in the code stream.  */
7658
7659       labelno = GEN_INT (pic_labelno++);
7660       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7661       l1 = gen_rtx_CONST (VOIDmode, l1);
7662
7663       /* On the ARM the PC register contains 'dot + 8' at the time of the
7664          addition, on the Thumb it is 'dot + 4'.  */
7665       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7666       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7667                                 UNSPEC_GOTSYM_OFF);
7668       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7669
7670       if (TARGET_32BIT)
7671         {
7672           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7673         }
7674       else /* TARGET_THUMB1 */
7675         {
7676           if (arm_pic_register != INVALID_REGNUM
7677               && REGNO (pic_reg) > LAST_LO_REGNUM)
7678             {
7679               /* We will have pushed the pic register, so we should always be
7680                  able to find a work register.  */
7681               pic_tmp = gen_rtx_REG (SImode,
7682                                      thumb_find_work_register (saved_regs));
7683               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7684               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7685               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7686             }
7687           else if (arm_pic_register != INVALID_REGNUM
7688                    && arm_pic_register > LAST_LO_REGNUM
7689                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7690             {
7691               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7692               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7693               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7694             }
7695           else
7696             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7697         }
7698     }
7699
7700   /* Need to emit this whether or not we obey regdecls,
7701      since setjmp/longjmp can cause life info to screw up.  */
7702   emit_use (pic_reg);
7703 }
7704
7705 /* Generate code to load the address of a static var when flag_pic is set.  */
7706 static rtx_insn *
7707 arm_pic_static_addr (rtx orig, rtx reg)
7708 {
7709   rtx l1, labelno, offset_rtx;
7710
7711   gcc_assert (flag_pic);
7712
7713   /* We use an UNSPEC rather than a LABEL_REF because this label
7714      never appears in the code stream.  */
7715   labelno = GEN_INT (pic_labelno++);
7716   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7717   l1 = gen_rtx_CONST (VOIDmode, l1);
7718
7719   /* On the ARM the PC register contains 'dot + 8' at the time of the
7720      addition, on the Thumb it is 'dot + 4'.  */
7721   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7722   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7723                                UNSPEC_SYMBOL_OFFSET);
7724   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7725
7726   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7727 }
7728
7729 /* Return nonzero if X is valid as an ARM state addressing register.  */
7730 static int
7731 arm_address_register_rtx_p (rtx x, int strict_p)
7732 {
7733   int regno;
7734
7735   if (!REG_P (x))
7736     return 0;
7737
7738   regno = REGNO (x);
7739
7740   if (strict_p)
7741     return ARM_REGNO_OK_FOR_BASE_P (regno);
7742
7743   return (regno <= LAST_ARM_REGNUM
7744           || regno >= FIRST_PSEUDO_REGISTER
7745           || regno == FRAME_POINTER_REGNUM
7746           || regno == ARG_POINTER_REGNUM);
7747 }
7748
7749 /* Return TRUE if this rtx is the difference of a symbol and a label,
7750    and will reduce to a PC-relative relocation in the object file.
7751    Expressions like this can be left alone when generating PIC, rather
7752    than forced through the GOT.  */
7753 static int
7754 pcrel_constant_p (rtx x)
7755 {
7756   if (GET_CODE (x) == MINUS)
7757     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7758
7759   return FALSE;
7760 }
7761
7762 /* Return true if X will surely end up in an index register after next
7763    splitting pass.  */
7764 static bool
7765 will_be_in_index_register (const_rtx x)
7766 {
7767   /* arm.md: calculate_pic_address will split this into a register.  */
7768   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7769 }
7770
7771 /* Return nonzero if X is a valid ARM state address operand.  */
7772 int
7773 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7774                                 int strict_p)
7775 {
7776   bool use_ldrd;
7777   enum rtx_code code = GET_CODE (x);
7778
7779   if (arm_address_register_rtx_p (x, strict_p))
7780     return 1;
7781
7782   use_ldrd = (TARGET_LDRD
7783               && (mode == DImode || mode == DFmode));
7784
7785   if (code == POST_INC || code == PRE_DEC
7786       || ((code == PRE_INC || code == POST_DEC)
7787           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7788     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7789
7790   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7791            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7792            && GET_CODE (XEXP (x, 1)) == PLUS
7793            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7794     {
7795       rtx addend = XEXP (XEXP (x, 1), 1);
7796
7797       /* Don't allow ldrd post increment by register because it's hard
7798          to fixup invalid register choices.  */
7799       if (use_ldrd
7800           && GET_CODE (x) == POST_MODIFY
7801           && REG_P (addend))
7802         return 0;
7803
7804       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7805               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7806     }
7807
7808   /* After reload constants split into minipools will have addresses
7809      from a LABEL_REF.  */
7810   else if (reload_completed
7811            && (code == LABEL_REF
7812                || (code == CONST
7813                    && GET_CODE (XEXP (x, 0)) == PLUS
7814                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7815                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7816     return 1;
7817
7818   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7819     return 0;
7820
7821   else if (code == PLUS)
7822     {
7823       rtx xop0 = XEXP (x, 0);
7824       rtx xop1 = XEXP (x, 1);
7825
7826       return ((arm_address_register_rtx_p (xop0, strict_p)
7827                && ((CONST_INT_P (xop1)
7828                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7829                    || (!strict_p && will_be_in_index_register (xop1))))
7830               || (arm_address_register_rtx_p (xop1, strict_p)
7831                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7832     }
7833
7834 #if 0
7835   /* Reload currently can't handle MINUS, so disable this for now */
7836   else if (GET_CODE (x) == MINUS)
7837     {
7838       rtx xop0 = XEXP (x, 0);
7839       rtx xop1 = XEXP (x, 1);
7840
7841       return (arm_address_register_rtx_p (xop0, strict_p)
7842               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7843     }
7844 #endif
7845
7846   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7847            && code == SYMBOL_REF
7848            && CONSTANT_POOL_ADDRESS_P (x)
7849            && ! (flag_pic
7850                  && symbol_mentioned_p (get_pool_constant (x))
7851                  && ! pcrel_constant_p (get_pool_constant (x))))
7852     return 1;
7853
7854   return 0;
7855 }
7856
7857 /* Return true if we can avoid creating a constant pool entry for x.  */
7858 static bool
7859 can_avoid_literal_pool_for_label_p (rtx x)
7860 {
7861   /* Normally we can assign constant values to target registers without
7862      the help of constant pool.  But there are cases we have to use constant
7863      pool like:
7864      1) assign a label to register.
7865      2) sign-extend a 8bit value to 32bit and then assign to register.
7866
7867      Constant pool access in format:
7868      (set (reg r0) (mem (symbol_ref (".LC0"))))
7869      will cause the use of literal pool (later in function arm_reorg).
7870      So here we mark such format as an invalid format, then the compiler
7871      will adjust it into:
7872      (set (reg r0) (symbol_ref (".LC0")))
7873      (set (reg r0) (mem (reg r0))).
7874      No extra register is required, and (mem (reg r0)) won't cause the use
7875      of literal pools.  */
7876   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7877       && CONSTANT_POOL_ADDRESS_P (x))
7878     return 1;
7879   return 0;
7880 }
7881
7882
7883 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7884 static int
7885 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7886 {
7887   bool use_ldrd;
7888   enum rtx_code code = GET_CODE (x);
7889
7890   if (arm_address_register_rtx_p (x, strict_p))
7891     return 1;
7892
7893   use_ldrd = (TARGET_LDRD
7894               && (mode == DImode || mode == DFmode));
7895
7896   if (code == POST_INC || code == PRE_DEC
7897       || ((code == PRE_INC || code == POST_DEC)
7898           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7899     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7900
7901   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7902            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7903            && GET_CODE (XEXP (x, 1)) == PLUS
7904            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7905     {
7906       /* Thumb-2 only has autoincrement by constant.  */
7907       rtx addend = XEXP (XEXP (x, 1), 1);
7908       HOST_WIDE_INT offset;
7909
7910       if (!CONST_INT_P (addend))
7911         return 0;
7912
7913       offset = INTVAL(addend);
7914       if (GET_MODE_SIZE (mode) <= 4)
7915         return (offset > -256 && offset < 256);
7916
7917       return (use_ldrd && offset > -1024 && offset < 1024
7918               && (offset & 3) == 0);
7919     }
7920
7921   /* After reload constants split into minipools will have addresses
7922      from a LABEL_REF.  */
7923   else if (reload_completed
7924            && (code == LABEL_REF
7925                || (code == CONST
7926                    && GET_CODE (XEXP (x, 0)) == PLUS
7927                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7928                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7929     return 1;
7930
7931   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7932     return 0;
7933
7934   else if (code == PLUS)
7935     {
7936       rtx xop0 = XEXP (x, 0);
7937       rtx xop1 = XEXP (x, 1);
7938
7939       return ((arm_address_register_rtx_p (xop0, strict_p)
7940                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7941                    || (!strict_p && will_be_in_index_register (xop1))))
7942               || (arm_address_register_rtx_p (xop1, strict_p)
7943                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7944     }
7945
7946   else if (can_avoid_literal_pool_for_label_p (x))
7947     return 0;
7948
7949   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7950            && code == SYMBOL_REF
7951            && CONSTANT_POOL_ADDRESS_P (x)
7952            && ! (flag_pic
7953                  && symbol_mentioned_p (get_pool_constant (x))
7954                  && ! pcrel_constant_p (get_pool_constant (x))))
7955     return 1;
7956
7957   return 0;
7958 }
7959
7960 /* Return nonzero if INDEX is valid for an address index operand in
7961    ARM state.  */
7962 static int
7963 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7964                         int strict_p)
7965 {
7966   HOST_WIDE_INT range;
7967   enum rtx_code code = GET_CODE (index);
7968
7969   /* Standard coprocessor addressing modes.  */
7970   if (TARGET_HARD_FLOAT
7971       && (mode == SFmode || mode == DFmode))
7972     return (code == CONST_INT && INTVAL (index) < 1024
7973             && INTVAL (index) > -1024
7974             && (INTVAL (index) & 3) == 0);
7975
7976   /* For quad modes, we restrict the constant offset to be slightly less
7977      than what the instruction format permits.  We do this because for
7978      quad mode moves, we will actually decompose them into two separate
7979      double-mode reads or writes.  INDEX must therefore be a valid
7980      (double-mode) offset and so should INDEX+8.  */
7981   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7982     return (code == CONST_INT
7983             && INTVAL (index) < 1016
7984             && INTVAL (index) > -1024
7985             && (INTVAL (index) & 3) == 0);
7986
7987   /* We have no such constraint on double mode offsets, so we permit the
7988      full range of the instruction format.  */
7989   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7990     return (code == CONST_INT
7991             && INTVAL (index) < 1024
7992             && INTVAL (index) > -1024
7993             && (INTVAL (index) & 3) == 0);
7994
7995   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7996     return (code == CONST_INT
7997             && INTVAL (index) < 1024
7998             && INTVAL (index) > -1024
7999             && (INTVAL (index) & 3) == 0);
8000
8001   if (arm_address_register_rtx_p (index, strict_p)
8002       && (GET_MODE_SIZE (mode) <= 4))
8003     return 1;
8004
8005   if (mode == DImode || mode == DFmode)
8006     {
8007       if (code == CONST_INT)
8008         {
8009           HOST_WIDE_INT val = INTVAL (index);
8010
8011           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8012              If vldr is selected it uses arm_coproc_mem_operand.  */
8013           if (TARGET_LDRD)
8014             return val > -256 && val < 256;
8015           else
8016             return val > -4096 && val < 4092;
8017         }
8018
8019       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8020     }
8021
8022   if (GET_MODE_SIZE (mode) <= 4
8023       && ! (arm_arch4
8024             && (mode == HImode
8025                 || mode == HFmode
8026                 || (mode == QImode && outer == SIGN_EXTEND))))
8027     {
8028       if (code == MULT)
8029         {
8030           rtx xiop0 = XEXP (index, 0);
8031           rtx xiop1 = XEXP (index, 1);
8032
8033           return ((arm_address_register_rtx_p (xiop0, strict_p)
8034                    && power_of_two_operand (xiop1, SImode))
8035                   || (arm_address_register_rtx_p (xiop1, strict_p)
8036                       && power_of_two_operand (xiop0, SImode)));
8037         }
8038       else if (code == LSHIFTRT || code == ASHIFTRT
8039                || code == ASHIFT || code == ROTATERT)
8040         {
8041           rtx op = XEXP (index, 1);
8042
8043           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8044                   && CONST_INT_P (op)
8045                   && INTVAL (op) > 0
8046                   && INTVAL (op) <= 31);
8047         }
8048     }
8049
8050   /* For ARM v4 we may be doing a sign-extend operation during the
8051      load.  */
8052   if (arm_arch4)
8053     {
8054       if (mode == HImode
8055           || mode == HFmode
8056           || (outer == SIGN_EXTEND && mode == QImode))
8057         range = 256;
8058       else
8059         range = 4096;
8060     }
8061   else
8062     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8063
8064   return (code == CONST_INT
8065           && INTVAL (index) < range
8066           && INTVAL (index) > -range);
8067 }
8068
8069 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8070    index operand.  i.e. 1, 2, 4 or 8.  */
8071 static bool
8072 thumb2_index_mul_operand (rtx op)
8073 {
8074   HOST_WIDE_INT val;
8075
8076   if (!CONST_INT_P (op))
8077     return false;
8078
8079   val = INTVAL(op);
8080   return (val == 1 || val == 2 || val == 4 || val == 8);
8081 }
8082
8083 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8084 static int
8085 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8086 {
8087   enum rtx_code code = GET_CODE (index);
8088
8089   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8090   /* Standard coprocessor addressing modes.  */
8091   if (TARGET_HARD_FLOAT
8092       && (mode == SFmode || mode == DFmode))
8093     return (code == CONST_INT && INTVAL (index) < 1024
8094             /* Thumb-2 allows only > -256 index range for it's core register
8095                load/stores. Since we allow SF/DF in core registers, we have
8096                to use the intersection between -256~4096 (core) and -1024~1024
8097                (coprocessor).  */
8098             && INTVAL (index) > -256
8099             && (INTVAL (index) & 3) == 0);
8100
8101   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8102     {
8103       /* For DImode assume values will usually live in core regs
8104          and only allow LDRD addressing modes.  */
8105       if (!TARGET_LDRD || mode != DImode)
8106         return (code == CONST_INT
8107                 && INTVAL (index) < 1024
8108                 && INTVAL (index) > -1024
8109                 && (INTVAL (index) & 3) == 0);
8110     }
8111
8112   /* For quad modes, we restrict the constant offset to be slightly less
8113      than what the instruction format permits.  We do this because for
8114      quad mode moves, we will actually decompose them into two separate
8115      double-mode reads or writes.  INDEX must therefore be a valid
8116      (double-mode) offset and so should INDEX+8.  */
8117   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8118     return (code == CONST_INT
8119             && INTVAL (index) < 1016
8120             && INTVAL (index) > -1024
8121             && (INTVAL (index) & 3) == 0);
8122
8123   /* We have no such constraint on double mode offsets, so we permit the
8124      full range of the instruction format.  */
8125   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8126     return (code == CONST_INT
8127             && INTVAL (index) < 1024
8128             && INTVAL (index) > -1024
8129             && (INTVAL (index) & 3) == 0);
8130
8131   if (arm_address_register_rtx_p (index, strict_p)
8132       && (GET_MODE_SIZE (mode) <= 4))
8133     return 1;
8134
8135   if (mode == DImode || mode == DFmode)
8136     {
8137       if (code == CONST_INT)
8138         {
8139           HOST_WIDE_INT val = INTVAL (index);
8140           /* Thumb-2 ldrd only has reg+const addressing modes.
8141              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8142              If vldr is selected it uses arm_coproc_mem_operand.  */
8143           if (TARGET_LDRD)
8144             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8145           else
8146             return IN_RANGE (val, -255, 4095 - 4);
8147         }
8148       else
8149         return 0;
8150     }
8151
8152   if (code == MULT)
8153     {
8154       rtx xiop0 = XEXP (index, 0);
8155       rtx xiop1 = XEXP (index, 1);
8156
8157       return ((arm_address_register_rtx_p (xiop0, strict_p)
8158                && thumb2_index_mul_operand (xiop1))
8159               || (arm_address_register_rtx_p (xiop1, strict_p)
8160                   && thumb2_index_mul_operand (xiop0)));
8161     }
8162   else if (code == ASHIFT)
8163     {
8164       rtx op = XEXP (index, 1);
8165
8166       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8167               && CONST_INT_P (op)
8168               && INTVAL (op) > 0
8169               && INTVAL (op) <= 3);
8170     }
8171
8172   return (code == CONST_INT
8173           && INTVAL (index) < 4096
8174           && INTVAL (index) > -256);
8175 }
8176
8177 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8178 static int
8179 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8180 {
8181   int regno;
8182
8183   if (!REG_P (x))
8184     return 0;
8185
8186   regno = REGNO (x);
8187
8188   if (strict_p)
8189     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8190
8191   return (regno <= LAST_LO_REGNUM
8192           || regno > LAST_VIRTUAL_REGISTER
8193           || regno == FRAME_POINTER_REGNUM
8194           || (GET_MODE_SIZE (mode) >= 4
8195               && (regno == STACK_POINTER_REGNUM
8196                   || regno >= FIRST_PSEUDO_REGISTER
8197                   || x == hard_frame_pointer_rtx
8198                   || x == arg_pointer_rtx)));
8199 }
8200
8201 /* Return nonzero if x is a legitimate index register.  This is the case
8202    for any base register that can access a QImode object.  */
8203 inline static int
8204 thumb1_index_register_rtx_p (rtx x, int strict_p)
8205 {
8206   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8207 }
8208
8209 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8210
8211    The AP may be eliminated to either the SP or the FP, so we use the
8212    least common denominator, e.g. SImode, and offsets from 0 to 64.
8213
8214    ??? Verify whether the above is the right approach.
8215
8216    ??? Also, the FP may be eliminated to the SP, so perhaps that
8217    needs special handling also.
8218
8219    ??? Look at how the mips16 port solves this problem.  It probably uses
8220    better ways to solve some of these problems.
8221
8222    Although it is not incorrect, we don't accept QImode and HImode
8223    addresses based on the frame pointer or arg pointer until the
8224    reload pass starts.  This is so that eliminating such addresses
8225    into stack based ones won't produce impossible code.  */
8226 int
8227 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8228 {
8229   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8230     return 0;
8231
8232   /* ??? Not clear if this is right.  Experiment.  */
8233   if (GET_MODE_SIZE (mode) < 4
8234       && !(reload_in_progress || reload_completed)
8235       && (reg_mentioned_p (frame_pointer_rtx, x)
8236           || reg_mentioned_p (arg_pointer_rtx, x)
8237           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8238           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8239           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8240           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8241     return 0;
8242
8243   /* Accept any base register.  SP only in SImode or larger.  */
8244   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8245     return 1;
8246
8247   /* This is PC relative data before arm_reorg runs.  */
8248   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8249            && GET_CODE (x) == SYMBOL_REF
8250            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8251     return 1;
8252
8253   /* This is PC relative data after arm_reorg runs.  */
8254   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8255            && reload_completed
8256            && (GET_CODE (x) == LABEL_REF
8257                || (GET_CODE (x) == CONST
8258                    && GET_CODE (XEXP (x, 0)) == PLUS
8259                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8260                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8261     return 1;
8262
8263   /* Post-inc indexing only supported for SImode and larger.  */
8264   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8265            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8266     return 1;
8267
8268   else if (GET_CODE (x) == PLUS)
8269     {
8270       /* REG+REG address can be any two index registers.  */
8271       /* We disallow FRAME+REG addressing since we know that FRAME
8272          will be replaced with STACK, and SP relative addressing only
8273          permits SP+OFFSET.  */
8274       if (GET_MODE_SIZE (mode) <= 4
8275           && XEXP (x, 0) != frame_pointer_rtx
8276           && XEXP (x, 1) != frame_pointer_rtx
8277           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8278           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8279               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8280         return 1;
8281
8282       /* REG+const has 5-7 bit offset for non-SP registers.  */
8283       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8284                 || XEXP (x, 0) == arg_pointer_rtx)
8285                && CONST_INT_P (XEXP (x, 1))
8286                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8287         return 1;
8288
8289       /* REG+const has 10-bit offset for SP, but only SImode and
8290          larger is supported.  */
8291       /* ??? Should probably check for DI/DFmode overflow here
8292          just like GO_IF_LEGITIMATE_OFFSET does.  */
8293       else if (REG_P (XEXP (x, 0))
8294                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8295                && GET_MODE_SIZE (mode) >= 4
8296                && CONST_INT_P (XEXP (x, 1))
8297                && INTVAL (XEXP (x, 1)) >= 0
8298                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8299                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8300         return 1;
8301
8302       else if (REG_P (XEXP (x, 0))
8303                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8304                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8305                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8306                        && REGNO (XEXP (x, 0))
8307                           <= LAST_VIRTUAL_POINTER_REGISTER))
8308                && GET_MODE_SIZE (mode) >= 4
8309                && CONST_INT_P (XEXP (x, 1))
8310                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8311         return 1;
8312     }
8313
8314   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8315            && GET_MODE_SIZE (mode) == 4
8316            && GET_CODE (x) == SYMBOL_REF
8317            && CONSTANT_POOL_ADDRESS_P (x)
8318            && ! (flag_pic
8319                  && symbol_mentioned_p (get_pool_constant (x))
8320                  && ! pcrel_constant_p (get_pool_constant (x))))
8321     return 1;
8322
8323   return 0;
8324 }
8325
8326 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8327    instruction of mode MODE.  */
8328 int
8329 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8330 {
8331   switch (GET_MODE_SIZE (mode))
8332     {
8333     case 1:
8334       return val >= 0 && val < 32;
8335
8336     case 2:
8337       return val >= 0 && val < 64 && (val & 1) == 0;
8338
8339     default:
8340       return (val >= 0
8341               && (val + GET_MODE_SIZE (mode)) <= 128
8342               && (val & 3) == 0);
8343     }
8344 }
8345
8346 bool
8347 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8348 {
8349   if (TARGET_ARM)
8350     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8351   else if (TARGET_THUMB2)
8352     return thumb2_legitimate_address_p (mode, x, strict_p);
8353   else /* if (TARGET_THUMB1) */
8354     return thumb1_legitimate_address_p (mode, x, strict_p);
8355 }
8356
8357 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8358
8359    Given an rtx X being reloaded into a reg required to be
8360    in class CLASS, return the class of reg to actually use.
8361    In general this is just CLASS, but for the Thumb core registers and
8362    immediate constants we prefer a LO_REGS class or a subset.  */
8363
8364 static reg_class_t
8365 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8366 {
8367   if (TARGET_32BIT)
8368     return rclass;
8369   else
8370     {
8371       if (rclass == GENERAL_REGS)
8372         return LO_REGS;
8373       else
8374         return rclass;
8375     }
8376 }
8377
8378 /* Build the SYMBOL_REF for __tls_get_addr.  */
8379
8380 static GTY(()) rtx tls_get_addr_libfunc;
8381
8382 static rtx
8383 get_tls_get_addr (void)
8384 {
8385   if (!tls_get_addr_libfunc)
8386     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8387   return tls_get_addr_libfunc;
8388 }
8389
8390 rtx
8391 arm_load_tp (rtx target)
8392 {
8393   if (!target)
8394     target = gen_reg_rtx (SImode);
8395
8396   if (TARGET_HARD_TP)
8397     {
8398       /* Can return in any reg.  */
8399       emit_insn (gen_load_tp_hard (target));
8400     }
8401   else
8402     {
8403       /* Always returned in r0.  Immediately copy the result into a pseudo,
8404          otherwise other uses of r0 (e.g. setting up function arguments) may
8405          clobber the value.  */
8406
8407       rtx tmp;
8408
8409       emit_insn (gen_load_tp_soft ());
8410
8411       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8412       emit_move_insn (target, tmp);
8413     }
8414   return target;
8415 }
8416
8417 static rtx
8418 load_tls_operand (rtx x, rtx reg)
8419 {
8420   rtx tmp;
8421
8422   if (reg == NULL_RTX)
8423     reg = gen_reg_rtx (SImode);
8424
8425   tmp = gen_rtx_CONST (SImode, x);
8426
8427   emit_move_insn (reg, tmp);
8428
8429   return reg;
8430 }
8431
8432 static rtx_insn *
8433 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8434 {
8435   rtx label, labelno, sum;
8436
8437   gcc_assert (reloc != TLS_DESCSEQ);
8438   start_sequence ();
8439
8440   labelno = GEN_INT (pic_labelno++);
8441   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8442   label = gen_rtx_CONST (VOIDmode, label);
8443
8444   sum = gen_rtx_UNSPEC (Pmode,
8445                         gen_rtvec (4, x, GEN_INT (reloc), label,
8446                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8447                         UNSPEC_TLS);
8448   reg = load_tls_operand (sum, reg);
8449
8450   if (TARGET_ARM)
8451     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8452   else
8453     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8454
8455   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8456                                      LCT_PURE, /* LCT_CONST?  */
8457                                      Pmode, reg, Pmode);
8458
8459   rtx_insn *insns = get_insns ();
8460   end_sequence ();
8461
8462   return insns;
8463 }
8464
8465 static rtx
8466 arm_tls_descseq_addr (rtx x, rtx reg)
8467 {
8468   rtx labelno = GEN_INT (pic_labelno++);
8469   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8470   rtx sum = gen_rtx_UNSPEC (Pmode,
8471                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8472                                        gen_rtx_CONST (VOIDmode, label),
8473                                        GEN_INT (!TARGET_ARM)),
8474                             UNSPEC_TLS);
8475   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8476
8477   emit_insn (gen_tlscall (x, labelno));
8478   if (!reg)
8479     reg = gen_reg_rtx (SImode);
8480   else
8481     gcc_assert (REGNO (reg) != R0_REGNUM);
8482
8483   emit_move_insn (reg, reg0);
8484
8485   return reg;
8486 }
8487
8488 rtx
8489 legitimize_tls_address (rtx x, rtx reg)
8490 {
8491   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8492   rtx_insn *insns;
8493   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8494
8495   switch (model)
8496     {
8497     case TLS_MODEL_GLOBAL_DYNAMIC:
8498       if (TARGET_GNU2_TLS)
8499         {
8500           reg = arm_tls_descseq_addr (x, reg);
8501
8502           tp = arm_load_tp (NULL_RTX);
8503
8504           dest = gen_rtx_PLUS (Pmode, tp, reg);
8505         }
8506       else
8507         {
8508           /* Original scheme */
8509           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8510           dest = gen_reg_rtx (Pmode);
8511           emit_libcall_block (insns, dest, ret, x);
8512         }
8513       return dest;
8514
8515     case TLS_MODEL_LOCAL_DYNAMIC:
8516       if (TARGET_GNU2_TLS)
8517         {
8518           reg = arm_tls_descseq_addr (x, reg);
8519
8520           tp = arm_load_tp (NULL_RTX);
8521
8522           dest = gen_rtx_PLUS (Pmode, tp, reg);
8523         }
8524       else
8525         {
8526           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8527
8528           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8529              share the LDM result with other LD model accesses.  */
8530           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8531                                 UNSPEC_TLS);
8532           dest = gen_reg_rtx (Pmode);
8533           emit_libcall_block (insns, dest, ret, eqv);
8534
8535           /* Load the addend.  */
8536           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8537                                                      GEN_INT (TLS_LDO32)),
8538                                    UNSPEC_TLS);
8539           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8540           dest = gen_rtx_PLUS (Pmode, dest, addend);
8541         }
8542       return dest;
8543
8544     case TLS_MODEL_INITIAL_EXEC:
8545       labelno = GEN_INT (pic_labelno++);
8546       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8547       label = gen_rtx_CONST (VOIDmode, label);
8548       sum = gen_rtx_UNSPEC (Pmode,
8549                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8550                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8551                             UNSPEC_TLS);
8552       reg = load_tls_operand (sum, reg);
8553
8554       if (TARGET_ARM)
8555         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8556       else if (TARGET_THUMB2)
8557         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8558       else
8559         {
8560           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8561           emit_move_insn (reg, gen_const_mem (SImode, reg));
8562         }
8563
8564       tp = arm_load_tp (NULL_RTX);
8565
8566       return gen_rtx_PLUS (Pmode, tp, reg);
8567
8568     case TLS_MODEL_LOCAL_EXEC:
8569       tp = arm_load_tp (NULL_RTX);
8570
8571       reg = gen_rtx_UNSPEC (Pmode,
8572                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8573                             UNSPEC_TLS);
8574       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8575
8576       return gen_rtx_PLUS (Pmode, tp, reg);
8577
8578     default:
8579       abort ();
8580     }
8581 }
8582
8583 /* Try machine-dependent ways of modifying an illegitimate address
8584    to be legitimate.  If we find one, return the new, valid address.  */
8585 rtx
8586 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8587 {
8588   if (arm_tls_referenced_p (x))
8589     {
8590       rtx addend = NULL;
8591
8592       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8593         {
8594           addend = XEXP (XEXP (x, 0), 1);
8595           x = XEXP (XEXP (x, 0), 0);
8596         }
8597
8598       if (GET_CODE (x) != SYMBOL_REF)
8599         return x;
8600
8601       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8602
8603       x = legitimize_tls_address (x, NULL_RTX);
8604
8605       if (addend)
8606         {
8607           x = gen_rtx_PLUS (SImode, x, addend);
8608           orig_x = x;
8609         }
8610       else
8611         return x;
8612     }
8613
8614   if (!TARGET_ARM)
8615     {
8616       /* TODO: legitimize_address for Thumb2.  */
8617       if (TARGET_THUMB2)
8618         return x;
8619       return thumb_legitimize_address (x, orig_x, mode);
8620     }
8621
8622   if (GET_CODE (x) == PLUS)
8623     {
8624       rtx xop0 = XEXP (x, 0);
8625       rtx xop1 = XEXP (x, 1);
8626
8627       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8628         xop0 = force_reg (SImode, xop0);
8629
8630       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8631           && !symbol_mentioned_p (xop1))
8632         xop1 = force_reg (SImode, xop1);
8633
8634       if (ARM_BASE_REGISTER_RTX_P (xop0)
8635           && CONST_INT_P (xop1))
8636         {
8637           HOST_WIDE_INT n, low_n;
8638           rtx base_reg, val;
8639           n = INTVAL (xop1);
8640
8641           /* VFP addressing modes actually allow greater offsets, but for
8642              now we just stick with the lowest common denominator.  */
8643           if (mode == DImode || mode == DFmode)
8644             {
8645               low_n = n & 0x0f;
8646               n &= ~0x0f;
8647               if (low_n > 4)
8648                 {
8649                   n += 16;
8650                   low_n -= 16;
8651                 }
8652             }
8653           else
8654             {
8655               low_n = ((mode) == TImode ? 0
8656                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8657               n -= low_n;
8658             }
8659
8660           base_reg = gen_reg_rtx (SImode);
8661           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8662           emit_move_insn (base_reg, val);
8663           x = plus_constant (Pmode, base_reg, low_n);
8664         }
8665       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8666         x = gen_rtx_PLUS (SImode, xop0, xop1);
8667     }
8668
8669   /* XXX We don't allow MINUS any more -- see comment in
8670      arm_legitimate_address_outer_p ().  */
8671   else if (GET_CODE (x) == MINUS)
8672     {
8673       rtx xop0 = XEXP (x, 0);
8674       rtx xop1 = XEXP (x, 1);
8675
8676       if (CONSTANT_P (xop0))
8677         xop0 = force_reg (SImode, xop0);
8678
8679       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8680         xop1 = force_reg (SImode, xop1);
8681
8682       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8683         x = gen_rtx_MINUS (SImode, xop0, xop1);
8684     }
8685
8686   /* Make sure to take full advantage of the pre-indexed addressing mode
8687      with absolute addresses which often allows for the base register to
8688      be factorized for multiple adjacent memory references, and it might
8689      even allows for the mini pool to be avoided entirely. */
8690   else if (CONST_INT_P (x) && optimize > 0)
8691     {
8692       unsigned int bits;
8693       HOST_WIDE_INT mask, base, index;
8694       rtx base_reg;
8695
8696       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8697          use a 8-bit index. So let's use a 12-bit index for SImode only and
8698          hope that arm_gen_constant will enable ldrb to use more bits. */
8699       bits = (mode == SImode) ? 12 : 8;
8700       mask = (1 << bits) - 1;
8701       base = INTVAL (x) & ~mask;
8702       index = INTVAL (x) & mask;
8703       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8704         {
8705           /* It'll most probably be more efficient to generate the base
8706              with more bits set and use a negative index instead. */
8707           base |= mask;
8708           index -= mask;
8709         }
8710       base_reg = force_reg (SImode, GEN_INT (base));
8711       x = plus_constant (Pmode, base_reg, index);
8712     }
8713
8714   if (flag_pic)
8715     {
8716       /* We need to find and carefully transform any SYMBOL and LABEL
8717          references; so go back to the original address expression.  */
8718       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8719
8720       if (new_x != orig_x)
8721         x = new_x;
8722     }
8723
8724   return x;
8725 }
8726
8727
8728 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8729    to be legitimate.  If we find one, return the new, valid address.  */
8730 rtx
8731 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8732 {
8733   if (GET_CODE (x) == PLUS
8734       && CONST_INT_P (XEXP (x, 1))
8735       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8736           || INTVAL (XEXP (x, 1)) < 0))
8737     {
8738       rtx xop0 = XEXP (x, 0);
8739       rtx xop1 = XEXP (x, 1);
8740       HOST_WIDE_INT offset = INTVAL (xop1);
8741
8742       /* Try and fold the offset into a biasing of the base register and
8743          then offsetting that.  Don't do this when optimizing for space
8744          since it can cause too many CSEs.  */
8745       if (optimize_size && offset >= 0
8746           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8747         {
8748           HOST_WIDE_INT delta;
8749
8750           if (offset >= 256)
8751             delta = offset - (256 - GET_MODE_SIZE (mode));
8752           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8753             delta = 31 * GET_MODE_SIZE (mode);
8754           else
8755             delta = offset & (~31 * GET_MODE_SIZE (mode));
8756
8757           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8758                                 NULL_RTX);
8759           x = plus_constant (Pmode, xop0, delta);
8760         }
8761       else if (offset < 0 && offset > -256)
8762         /* Small negative offsets are best done with a subtract before the
8763            dereference, forcing these into a register normally takes two
8764            instructions.  */
8765         x = force_operand (x, NULL_RTX);
8766       else
8767         {
8768           /* For the remaining cases, force the constant into a register.  */
8769           xop1 = force_reg (SImode, xop1);
8770           x = gen_rtx_PLUS (SImode, xop0, xop1);
8771         }
8772     }
8773   else if (GET_CODE (x) == PLUS
8774            && s_register_operand (XEXP (x, 1), SImode)
8775            && !s_register_operand (XEXP (x, 0), SImode))
8776     {
8777       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8778
8779       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8780     }
8781
8782   if (flag_pic)
8783     {
8784       /* We need to find and carefully transform any SYMBOL and LABEL
8785          references; so go back to the original address expression.  */
8786       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8787
8788       if (new_x != orig_x)
8789         x = new_x;
8790     }
8791
8792   return x;
8793 }
8794
8795 /* Return TRUE if X contains any TLS symbol references.  */
8796
8797 bool
8798 arm_tls_referenced_p (rtx x)
8799 {
8800   if (! TARGET_HAVE_TLS)
8801     return false;
8802
8803   subrtx_iterator::array_type array;
8804   FOR_EACH_SUBRTX (iter, array, x, ALL)
8805     {
8806       const_rtx x = *iter;
8807       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8808         {
8809           /* ARM currently does not provide relocations to encode TLS variables
8810              into AArch32 instructions, only data, so there is no way to
8811              currently implement these if a literal pool is disabled.  */
8812           if (arm_disable_literal_pool)
8813             sorry ("accessing thread-local storage is not currently supported "
8814                    "with -mpure-code or -mslow-flash-data");
8815
8816           return true;
8817         }
8818
8819       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8820          TLS offsets, not real symbol references.  */
8821       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8822         iter.skip_subrtxes ();
8823     }
8824   return false;
8825 }
8826
8827 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8828
8829    On the ARM, allow any integer (invalid ones are removed later by insn
8830    patterns), nice doubles and symbol_refs which refer to the function's
8831    constant pool XXX.
8832
8833    When generating pic allow anything.  */
8834
8835 static bool
8836 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8837 {
8838   return flag_pic || !label_mentioned_p (x);
8839 }
8840
8841 static bool
8842 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8843 {
8844   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8845      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8846      for ARMv8-M Baseline or later the result is valid.  */
8847   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8848     x = XEXP (x, 0);
8849
8850   return (CONST_INT_P (x)
8851           || CONST_DOUBLE_P (x)
8852           || CONSTANT_ADDRESS_P (x)
8853           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8854           || flag_pic);
8855 }
8856
8857 static bool
8858 arm_legitimate_constant_p (machine_mode mode, rtx x)
8859 {
8860   return (!arm_cannot_force_const_mem (mode, x)
8861           && (TARGET_32BIT
8862               ? arm_legitimate_constant_p_1 (mode, x)
8863               : thumb_legitimate_constant_p (mode, x)));
8864 }
8865
8866 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8867
8868 static bool
8869 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8870 {
8871   rtx base, offset;
8872
8873   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8874     {
8875       split_const (x, &base, &offset);
8876       if (GET_CODE (base) == SYMBOL_REF
8877           && !offset_within_block_p (base, INTVAL (offset)))
8878         return true;
8879     }
8880   return arm_tls_referenced_p (x);
8881 }
8882 \f
8883 #define REG_OR_SUBREG_REG(X)                                            \
8884   (REG_P (X)                                                    \
8885    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8886
8887 #define REG_OR_SUBREG_RTX(X)                    \
8888    (REG_P (X) ? (X) : SUBREG_REG (X))
8889
8890 static inline int
8891 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8892 {
8893   machine_mode mode = GET_MODE (x);
8894   int total, words;
8895
8896   switch (code)
8897     {
8898     case ASHIFT:
8899     case ASHIFTRT:
8900     case LSHIFTRT:
8901     case ROTATERT:
8902       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8903
8904     case PLUS:
8905     case MINUS:
8906     case COMPARE:
8907     case NEG:
8908     case NOT:
8909       return COSTS_N_INSNS (1);
8910
8911     case MULT:
8912       if (arm_arch6m && arm_m_profile_small_mul)
8913         return COSTS_N_INSNS (32);
8914
8915       if (CONST_INT_P (XEXP (x, 1)))
8916         {
8917           int cycles = 0;
8918           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8919
8920           while (i)
8921             {
8922               i >>= 2;
8923               cycles++;
8924             }
8925           return COSTS_N_INSNS (2) + cycles;
8926         }
8927       return COSTS_N_INSNS (1) + 16;
8928
8929     case SET:
8930       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8931          the mode.  */
8932       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8933       return (COSTS_N_INSNS (words)
8934               + 4 * ((MEM_P (SET_SRC (x)))
8935                      + MEM_P (SET_DEST (x))));
8936
8937     case CONST_INT:
8938       if (outer == SET)
8939         {
8940           if (UINTVAL (x) < 256
8941               /* 16-bit constant.  */
8942               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8943             return 0;
8944           if (thumb_shiftable_const (INTVAL (x)))
8945             return COSTS_N_INSNS (2);
8946           return COSTS_N_INSNS (3);
8947         }
8948       else if ((outer == PLUS || outer == COMPARE)
8949                && INTVAL (x) < 256 && INTVAL (x) > -256)
8950         return 0;
8951       else if ((outer == IOR || outer == XOR || outer == AND)
8952                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8953         return COSTS_N_INSNS (1);
8954       else if (outer == AND)
8955         {
8956           int i;
8957           /* This duplicates the tests in the andsi3 expander.  */
8958           for (i = 9; i <= 31; i++)
8959             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8960                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8961               return COSTS_N_INSNS (2);
8962         }
8963       else if (outer == ASHIFT || outer == ASHIFTRT
8964                || outer == LSHIFTRT)
8965         return 0;
8966       return COSTS_N_INSNS (2);
8967
8968     case CONST:
8969     case CONST_DOUBLE:
8970     case LABEL_REF:
8971     case SYMBOL_REF:
8972       return COSTS_N_INSNS (3);
8973
8974     case UDIV:
8975     case UMOD:
8976     case DIV:
8977     case MOD:
8978       return 100;
8979
8980     case TRUNCATE:
8981       return 99;
8982
8983     case AND:
8984     case XOR:
8985     case IOR:
8986       /* XXX guess.  */
8987       return 8;
8988
8989     case MEM:
8990       /* XXX another guess.  */
8991       /* Memory costs quite a lot for the first word, but subsequent words
8992          load at the equivalent of a single insn each.  */
8993       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8994               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8995                  ? 4 : 0));
8996
8997     case IF_THEN_ELSE:
8998       /* XXX a guess.  */
8999       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9000         return 14;
9001       return 2;
9002
9003     case SIGN_EXTEND:
9004     case ZERO_EXTEND:
9005       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9006       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9007
9008       if (mode == SImode)
9009         return total;
9010
9011       if (arm_arch6)
9012         return total + COSTS_N_INSNS (1);
9013
9014       /* Assume a two-shift sequence.  Increase the cost slightly so
9015          we prefer actual shifts over an extend operation.  */
9016       return total + 1 + COSTS_N_INSNS (2);
9017
9018     default:
9019       return 99;
9020     }
9021 }
9022
9023 /* Estimates the size cost of thumb1 instructions.
9024    For now most of the code is copied from thumb1_rtx_costs. We need more
9025    fine grain tuning when we have more related test cases.  */
9026 static inline int
9027 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9028 {
9029   machine_mode mode = GET_MODE (x);
9030   int words, cost;
9031
9032   switch (code)
9033     {
9034     case ASHIFT:
9035     case ASHIFTRT:
9036     case LSHIFTRT:
9037     case ROTATERT:
9038       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9039
9040     case PLUS:
9041     case MINUS:
9042       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9043          defined by RTL expansion, especially for the expansion of
9044          multiplication.  */
9045       if ((GET_CODE (XEXP (x, 0)) == MULT
9046            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9047           || (GET_CODE (XEXP (x, 1)) == MULT
9048               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9049         return COSTS_N_INSNS (2);
9050       /* Fall through.  */
9051     case COMPARE:
9052     case NEG:
9053     case NOT:
9054       return COSTS_N_INSNS (1);
9055
9056     case MULT:
9057       if (CONST_INT_P (XEXP (x, 1)))
9058         {
9059           /* Thumb1 mul instruction can't operate on const. We must Load it
9060              into a register first.  */
9061           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9062           /* For the targets which have a very small and high-latency multiply
9063              unit, we prefer to synthesize the mult with up to 5 instructions,
9064              giving a good balance between size and performance.  */
9065           if (arm_arch6m && arm_m_profile_small_mul)
9066             return COSTS_N_INSNS (5);
9067           else
9068             return COSTS_N_INSNS (1) + const_size;
9069         }
9070       return COSTS_N_INSNS (1);
9071
9072     case SET:
9073       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9074          the mode.  */
9075       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9076       cost = COSTS_N_INSNS (words);
9077       if (satisfies_constraint_J (SET_SRC (x))
9078           || satisfies_constraint_K (SET_SRC (x))
9079              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9080           || (CONST_INT_P (SET_SRC (x))
9081               && UINTVAL (SET_SRC (x)) >= 256
9082               && TARGET_HAVE_MOVT
9083               && satisfies_constraint_j (SET_SRC (x)))
9084              /* thumb1_movdi_insn.  */
9085           || ((words > 1) && MEM_P (SET_SRC (x))))
9086         cost += COSTS_N_INSNS (1);
9087       return cost;
9088
9089     case CONST_INT:
9090       if (outer == SET)
9091         {
9092           if (UINTVAL (x) < 256)
9093             return COSTS_N_INSNS (1);
9094           /* movw is 4byte long.  */
9095           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9096             return COSTS_N_INSNS (2);
9097           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9098           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9099             return COSTS_N_INSNS (2);
9100           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9101           if (thumb_shiftable_const (INTVAL (x)))
9102             return COSTS_N_INSNS (2);
9103           return COSTS_N_INSNS (3);
9104         }
9105       else if ((outer == PLUS || outer == COMPARE)
9106                && INTVAL (x) < 256 && INTVAL (x) > -256)
9107         return 0;
9108       else if ((outer == IOR || outer == XOR || outer == AND)
9109                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9110         return COSTS_N_INSNS (1);
9111       else if (outer == AND)
9112         {
9113           int i;
9114           /* This duplicates the tests in the andsi3 expander.  */
9115           for (i = 9; i <= 31; i++)
9116             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9117                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9118               return COSTS_N_INSNS (2);
9119         }
9120       else if (outer == ASHIFT || outer == ASHIFTRT
9121                || outer == LSHIFTRT)
9122         return 0;
9123       return COSTS_N_INSNS (2);
9124
9125     case CONST:
9126     case CONST_DOUBLE:
9127     case LABEL_REF:
9128     case SYMBOL_REF:
9129       return COSTS_N_INSNS (3);
9130
9131     case UDIV:
9132     case UMOD:
9133     case DIV:
9134     case MOD:
9135       return 100;
9136
9137     case TRUNCATE:
9138       return 99;
9139
9140     case AND:
9141     case XOR:
9142     case IOR:
9143       return COSTS_N_INSNS (1);
9144
9145     case MEM:
9146       return (COSTS_N_INSNS (1)
9147               + COSTS_N_INSNS (1)
9148                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9149               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9150                  ? COSTS_N_INSNS (1) : 0));
9151
9152     case IF_THEN_ELSE:
9153       /* XXX a guess.  */
9154       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9155         return 14;
9156       return 2;
9157
9158     case ZERO_EXTEND:
9159       /* XXX still guessing.  */
9160       switch (GET_MODE (XEXP (x, 0)))
9161         {
9162           case E_QImode:
9163             return (1 + (mode == DImode ? 4 : 0)
9164                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9165
9166           case E_HImode:
9167             return (4 + (mode == DImode ? 4 : 0)
9168                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9169
9170           case E_SImode:
9171             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9172
9173           default:
9174             return 99;
9175         }
9176
9177     default:
9178       return 99;
9179     }
9180 }
9181
9182 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9183    operand, then return the operand that is being shifted.  If the shift
9184    is not by a constant, then set SHIFT_REG to point to the operand.
9185    Return NULL if OP is not a shifter operand.  */
9186 static rtx
9187 shifter_op_p (rtx op, rtx *shift_reg)
9188 {
9189   enum rtx_code code = GET_CODE (op);
9190
9191   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9192       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9193     return XEXP (op, 0);
9194   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9195     return XEXP (op, 0);
9196   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9197            || code == ASHIFTRT)
9198     {
9199       if (!CONST_INT_P (XEXP (op, 1)))
9200         *shift_reg = XEXP (op, 1);
9201       return XEXP (op, 0);
9202     }
9203
9204   return NULL;
9205 }
9206
9207 static bool
9208 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9209 {
9210   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9211   rtx_code code = GET_CODE (x);
9212   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9213
9214   switch (XINT (x, 1))
9215     {
9216     case UNSPEC_UNALIGNED_LOAD:
9217       /* We can only do unaligned loads into the integer unit, and we can't
9218          use LDM or LDRD.  */
9219       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9220       if (speed_p)
9221         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9222                   + extra_cost->ldst.load_unaligned);
9223
9224 #ifdef NOT_YET
9225       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9226                                  ADDR_SPACE_GENERIC, speed_p);
9227 #endif
9228       return true;
9229
9230     case UNSPEC_UNALIGNED_STORE:
9231       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9232       if (speed_p)
9233         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9234                   + extra_cost->ldst.store_unaligned);
9235
9236       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9237 #ifdef NOT_YET
9238       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9239                                  ADDR_SPACE_GENERIC, speed_p);
9240 #endif
9241       return true;
9242
9243     case UNSPEC_VRINTZ:
9244     case UNSPEC_VRINTP:
9245     case UNSPEC_VRINTM:
9246     case UNSPEC_VRINTR:
9247     case UNSPEC_VRINTX:
9248     case UNSPEC_VRINTA:
9249       if (speed_p)
9250         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9251
9252       return true;
9253     default:
9254       *cost = COSTS_N_INSNS (2);
9255       break;
9256     }
9257   return true;
9258 }
9259
9260 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9261    call (one insn for -Os) and then one for processing the result.  */
9262 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9263
9264 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9265         do                                                              \
9266           {                                                             \
9267             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9268             if (shift_op != NULL                                        \
9269                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9270               {                                                         \
9271                 if (shift_reg)                                          \
9272                   {                                                     \
9273                     if (speed_p)                                        \
9274                       *cost += extra_cost->alu.arith_shift_reg;         \
9275                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9276                                        ASHIFT, 1, speed_p);             \
9277                   }                                                     \
9278                 else if (speed_p)                                       \
9279                   *cost += extra_cost->alu.arith_shift;                 \
9280                                                                         \
9281                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9282                                     ASHIFT, 0, speed_p)                 \
9283                           + rtx_cost (XEXP (x, 1 - IDX),                \
9284                                       GET_MODE (shift_op),              \
9285                                       OP, 1, speed_p));                 \
9286                 return true;                                            \
9287               }                                                         \
9288           }                                                             \
9289         while (0)
9290
9291 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9292    considering the costs of the addressing mode and memory access
9293    separately.  */
9294 static bool
9295 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9296                int *cost, bool speed_p)
9297 {
9298   machine_mode mode = GET_MODE (x);
9299
9300   *cost = COSTS_N_INSNS (1);
9301
9302   if (flag_pic
9303       && GET_CODE (XEXP (x, 0)) == PLUS
9304       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9305     /* This will be split into two instructions.  Add the cost of the
9306        additional instruction here.  The cost of the memory access is computed
9307        below.  See arm.md:calculate_pic_address.  */
9308     *cost += COSTS_N_INSNS (1);
9309
9310   /* Calculate cost of the addressing mode.  */
9311   if (speed_p)
9312     {
9313       arm_addr_mode_op op_type;
9314       switch (GET_CODE (XEXP (x, 0)))
9315         {
9316         default:
9317         case REG:
9318           op_type = AMO_DEFAULT;
9319           break;
9320         case MINUS:
9321           /* MINUS does not appear in RTL, but the architecture supports it,
9322              so handle this case defensively.  */
9323           /* fall through */
9324         case PLUS:
9325           op_type = AMO_NO_WB;
9326           break;
9327         case PRE_INC:
9328         case PRE_DEC:
9329         case POST_INC:
9330         case POST_DEC:
9331         case PRE_MODIFY:
9332         case POST_MODIFY:
9333           op_type = AMO_WB;
9334           break;
9335         }
9336
9337       if (VECTOR_MODE_P (mode))
9338           *cost += current_tune->addr_mode_costs->vector[op_type];
9339       else if (FLOAT_MODE_P (mode))
9340           *cost += current_tune->addr_mode_costs->fp[op_type];
9341       else
9342           *cost += current_tune->addr_mode_costs->integer[op_type];
9343     }
9344
9345   /* Calculate cost of memory access.  */
9346   if (speed_p)
9347     {
9348       if (FLOAT_MODE_P (mode))
9349         {
9350           if (GET_MODE_SIZE (mode) == 8)
9351             *cost += extra_cost->ldst.loadd;
9352           else
9353             *cost += extra_cost->ldst.loadf;
9354         }
9355       else if (VECTOR_MODE_P (mode))
9356         *cost += extra_cost->ldst.loadv;
9357       else
9358         {
9359           /* Integer modes */
9360           if (GET_MODE_SIZE (mode) == 8)
9361             *cost += extra_cost->ldst.ldrd;
9362           else
9363             *cost += extra_cost->ldst.load;
9364         }
9365     }
9366
9367   return true;
9368 }
9369
9370 /* RTX costs.  Make an estimate of the cost of executing the operation
9371    X, which is contained within an operation with code OUTER_CODE.
9372    SPEED_P indicates whether the cost desired is the performance cost,
9373    or the size cost.  The estimate is stored in COST and the return
9374    value is TRUE if the cost calculation is final, or FALSE if the
9375    caller should recurse through the operands of X to add additional
9376    costs.
9377
9378    We currently make no attempt to model the size savings of Thumb-2
9379    16-bit instructions.  At the normal points in compilation where
9380    this code is called we have no measure of whether the condition
9381    flags are live or not, and thus no realistic way to determine what
9382    the size will eventually be.  */
9383 static bool
9384 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9385                    const struct cpu_cost_table *extra_cost,
9386                    int *cost, bool speed_p)
9387 {
9388   machine_mode mode = GET_MODE (x);
9389
9390   *cost = COSTS_N_INSNS (1);
9391
9392   if (TARGET_THUMB1)
9393     {
9394       if (speed_p)
9395         *cost = thumb1_rtx_costs (x, code, outer_code);
9396       else
9397         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9398       return true;
9399     }
9400
9401   switch (code)
9402     {
9403     case SET:
9404       *cost = 0;
9405       /* SET RTXs don't have a mode so we get it from the destination.  */
9406       mode = GET_MODE (SET_DEST (x));
9407
9408       if (REG_P (SET_SRC (x))
9409           && REG_P (SET_DEST (x)))
9410         {
9411           /* Assume that most copies can be done with a single insn,
9412              unless we don't have HW FP, in which case everything
9413              larger than word mode will require two insns.  */
9414           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9415                                    && GET_MODE_SIZE (mode) > 4)
9416                                   || mode == DImode)
9417                                  ? 2 : 1);
9418           /* Conditional register moves can be encoded
9419              in 16 bits in Thumb mode.  */
9420           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9421             *cost >>= 1;
9422
9423           return true;
9424         }
9425
9426       if (CONST_INT_P (SET_SRC (x)))
9427         {
9428           /* Handle CONST_INT here, since the value doesn't have a mode
9429              and we would otherwise be unable to work out the true cost.  */
9430           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9431                             0, speed_p);
9432           outer_code = SET;
9433           /* Slightly lower the cost of setting a core reg to a constant.
9434              This helps break up chains and allows for better scheduling.  */
9435           if (REG_P (SET_DEST (x))
9436               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9437             *cost -= 1;
9438           x = SET_SRC (x);
9439           /* Immediate moves with an immediate in the range [0, 255] can be
9440              encoded in 16 bits in Thumb mode.  */
9441           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9442               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9443             *cost >>= 1;
9444           goto const_int_cost;
9445         }
9446
9447       return false;
9448
9449     case MEM:
9450       return arm_mem_costs (x, extra_cost, cost, speed_p);
9451
9452     case PARALLEL:
9453     {
9454    /* Calculations of LDM costs are complex.  We assume an initial cost
9455    (ldm_1st) which will load the number of registers mentioned in
9456    ldm_regs_per_insn_1st registers; then each additional
9457    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9458    formula for N regs is thus:
9459
9460    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9461                              + ldm_regs_per_insn_subsequent - 1)
9462                             / ldm_regs_per_insn_subsequent).
9463
9464    Additional costs may also be added for addressing.  A similar
9465    formula is used for STM.  */
9466
9467       bool is_ldm = load_multiple_operation (x, SImode);
9468       bool is_stm = store_multiple_operation (x, SImode);
9469
9470       if (is_ldm || is_stm)
9471         {
9472           if (speed_p)
9473             {
9474               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9475               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9476                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9477                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9478               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9479                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9480                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9481
9482               *cost += regs_per_insn_1st
9483                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9484                                             + regs_per_insn_sub - 1)
9485                                           / regs_per_insn_sub);
9486               return true;
9487             }
9488
9489         }
9490       return false;
9491     }
9492     case DIV:
9493     case UDIV:
9494       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9495           && (mode == SFmode || !TARGET_VFP_SINGLE))
9496         *cost += COSTS_N_INSNS (speed_p
9497                                ? extra_cost->fp[mode != SFmode].div : 0);
9498       else if (mode == SImode && TARGET_IDIV)
9499         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9500       else
9501         *cost = LIBCALL_COST (2);
9502
9503       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9504          possible udiv is prefered.  */
9505       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9506       return false;     /* All arguments must be in registers.  */
9507
9508     case MOD:
9509       /* MOD by a power of 2 can be expanded as:
9510          rsbs    r1, r0, #0
9511          and     r0, r0, #(n - 1)
9512          and     r1, r1, #(n - 1)
9513          rsbpl   r0, r1, #0.  */
9514       if (CONST_INT_P (XEXP (x, 1))
9515           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9516           && mode == SImode)
9517         {
9518           *cost += COSTS_N_INSNS (3);
9519
9520           if (speed_p)
9521             *cost += 2 * extra_cost->alu.logical
9522                      + extra_cost->alu.arith;
9523           return true;
9524         }
9525
9526     /* Fall-through.  */
9527     case UMOD:
9528       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9529          possible udiv is prefered.  */
9530       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9531       return false;     /* All arguments must be in registers.  */
9532
9533     case ROTATE:
9534       if (mode == SImode && REG_P (XEXP (x, 1)))
9535         {
9536           *cost += (COSTS_N_INSNS (1)
9537                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9538           if (speed_p)
9539             *cost += extra_cost->alu.shift_reg;
9540           return true;
9541         }
9542       /* Fall through */
9543     case ROTATERT:
9544     case ASHIFT:
9545     case LSHIFTRT:
9546     case ASHIFTRT:
9547       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9548         {
9549           *cost += (COSTS_N_INSNS (2)
9550                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9551           if (speed_p)
9552             *cost += 2 * extra_cost->alu.shift;
9553           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9554           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9555             *cost += 1;
9556           return true;
9557         }
9558       else if (mode == SImode)
9559         {
9560           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9561           /* Slightly disparage register shifts at -Os, but not by much.  */
9562           if (!CONST_INT_P (XEXP (x, 1)))
9563             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9564                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9565           return true;
9566         }
9567       else if (GET_MODE_CLASS (mode) == MODE_INT
9568                && GET_MODE_SIZE (mode) < 4)
9569         {
9570           if (code == ASHIFT)
9571             {
9572               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9573               /* Slightly disparage register shifts at -Os, but not by
9574                  much.  */
9575               if (!CONST_INT_P (XEXP (x, 1)))
9576                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9577                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9578             }
9579           else if (code == LSHIFTRT || code == ASHIFTRT)
9580             {
9581               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9582                 {
9583                   /* Can use SBFX/UBFX.  */
9584                   if (speed_p)
9585                     *cost += extra_cost->alu.bfx;
9586                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9587                 }
9588               else
9589                 {
9590                   *cost += COSTS_N_INSNS (1);
9591                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9592                   if (speed_p)
9593                     {
9594                       if (CONST_INT_P (XEXP (x, 1)))
9595                         *cost += 2 * extra_cost->alu.shift;
9596                       else
9597                         *cost += (extra_cost->alu.shift
9598                                   + extra_cost->alu.shift_reg);
9599                     }
9600                   else
9601                     /* Slightly disparage register shifts.  */
9602                     *cost += !CONST_INT_P (XEXP (x, 1));
9603                 }
9604             }
9605           else /* Rotates.  */
9606             {
9607               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9608               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9609               if (speed_p)
9610                 {
9611                   if (CONST_INT_P (XEXP (x, 1)))
9612                     *cost += (2 * extra_cost->alu.shift
9613                               + extra_cost->alu.log_shift);
9614                   else
9615                     *cost += (extra_cost->alu.shift
9616                               + extra_cost->alu.shift_reg
9617                               + extra_cost->alu.log_shift_reg);
9618                 }
9619             }
9620           return true;
9621         }
9622
9623       *cost = LIBCALL_COST (2);
9624       return false;
9625
9626     case BSWAP:
9627       if (arm_arch6)
9628         {
9629           if (mode == SImode)
9630             {
9631               if (speed_p)
9632                 *cost += extra_cost->alu.rev;
9633
9634               return false;
9635             }
9636         }
9637       else
9638         {
9639         /* No rev instruction available.  Look at arm_legacy_rev
9640            and thumb_legacy_rev for the form of RTL used then.  */
9641           if (TARGET_THUMB)
9642             {
9643               *cost += COSTS_N_INSNS (9);
9644
9645               if (speed_p)
9646                 {
9647                   *cost += 6 * extra_cost->alu.shift;
9648                   *cost += 3 * extra_cost->alu.logical;
9649                 }
9650             }
9651           else
9652             {
9653               *cost += COSTS_N_INSNS (4);
9654
9655               if (speed_p)
9656                 {
9657                   *cost += 2 * extra_cost->alu.shift;
9658                   *cost += extra_cost->alu.arith_shift;
9659                   *cost += 2 * extra_cost->alu.logical;
9660                 }
9661             }
9662           return true;
9663         }
9664       return false;
9665
9666     case MINUS:
9667       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9668           && (mode == SFmode || !TARGET_VFP_SINGLE))
9669         {
9670           if (GET_CODE (XEXP (x, 0)) == MULT
9671               || GET_CODE (XEXP (x, 1)) == MULT)
9672             {
9673               rtx mul_op0, mul_op1, sub_op;
9674
9675               if (speed_p)
9676                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9677
9678               if (GET_CODE (XEXP (x, 0)) == MULT)
9679                 {
9680                   mul_op0 = XEXP (XEXP (x, 0), 0);
9681                   mul_op1 = XEXP (XEXP (x, 0), 1);
9682                   sub_op = XEXP (x, 1);
9683                 }
9684               else
9685                 {
9686                   mul_op0 = XEXP (XEXP (x, 1), 0);
9687                   mul_op1 = XEXP (XEXP (x, 1), 1);
9688                   sub_op = XEXP (x, 0);
9689                 }
9690
9691               /* The first operand of the multiply may be optionally
9692                  negated.  */
9693               if (GET_CODE (mul_op0) == NEG)
9694                 mul_op0 = XEXP (mul_op0, 0);
9695
9696               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9697                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9698                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9699
9700               return true;
9701             }
9702
9703           if (speed_p)
9704             *cost += extra_cost->fp[mode != SFmode].addsub;
9705           return false;
9706         }
9707
9708       if (mode == SImode)
9709         {
9710           rtx shift_by_reg = NULL;
9711           rtx shift_op;
9712           rtx non_shift_op;
9713
9714           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9715           if (shift_op == NULL)
9716             {
9717               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9718               non_shift_op = XEXP (x, 0);
9719             }
9720           else
9721             non_shift_op = XEXP (x, 1);
9722
9723           if (shift_op != NULL)
9724             {
9725               if (shift_by_reg != NULL)
9726                 {
9727                   if (speed_p)
9728                     *cost += extra_cost->alu.arith_shift_reg;
9729                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9730                 }
9731               else if (speed_p)
9732                 *cost += extra_cost->alu.arith_shift;
9733
9734               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9735               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9736               return true;
9737             }
9738
9739           if (arm_arch_thumb2
9740               && GET_CODE (XEXP (x, 1)) == MULT)
9741             {
9742               /* MLS.  */
9743               if (speed_p)
9744                 *cost += extra_cost->mult[0].add;
9745               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9746               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9747               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9748               return true;
9749             }
9750
9751           if (CONST_INT_P (XEXP (x, 0)))
9752             {
9753               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9754                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9755                                             NULL_RTX, 1, 0);
9756               *cost = COSTS_N_INSNS (insns);
9757               if (speed_p)
9758                 *cost += insns * extra_cost->alu.arith;
9759               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9760               return true;
9761             }
9762           else if (speed_p)
9763             *cost += extra_cost->alu.arith;
9764
9765           return false;
9766         }
9767
9768       if (GET_MODE_CLASS (mode) == MODE_INT
9769           && GET_MODE_SIZE (mode) < 4)
9770         {
9771           rtx shift_op, shift_reg;
9772           shift_reg = NULL;
9773
9774           /* We check both sides of the MINUS for shifter operands since,
9775              unlike PLUS, it's not commutative.  */
9776
9777           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9778           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9779
9780           /* Slightly disparage, as we might need to widen the result.  */
9781           *cost += 1;
9782           if (speed_p)
9783             *cost += extra_cost->alu.arith;
9784
9785           if (CONST_INT_P (XEXP (x, 0)))
9786             {
9787               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9788               return true;
9789             }
9790
9791           return false;
9792         }
9793
9794       if (mode == DImode)
9795         {
9796           *cost += COSTS_N_INSNS (1);
9797
9798           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9799             {
9800               rtx op1 = XEXP (x, 1);
9801
9802               if (speed_p)
9803                 *cost += 2 * extra_cost->alu.arith;
9804
9805               if (GET_CODE (op1) == ZERO_EXTEND)
9806                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9807                                    0, speed_p);
9808               else
9809                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9810               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9811                                  0, speed_p);
9812               return true;
9813             }
9814           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9815             {
9816               if (speed_p)
9817                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9818               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9819                                   0, speed_p)
9820                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9821               return true;
9822             }
9823           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9824                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9825             {
9826               if (speed_p)
9827                 *cost += (extra_cost->alu.arith
9828                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9829                              ? extra_cost->alu.arith
9830                              : extra_cost->alu.arith_shift));
9831               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9832                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9833                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9834               return true;
9835             }
9836
9837           if (speed_p)
9838             *cost += 2 * extra_cost->alu.arith;
9839           return false;
9840         }
9841
9842       /* Vector mode?  */
9843
9844       *cost = LIBCALL_COST (2);
9845       return false;
9846
9847     case PLUS:
9848       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9849           && (mode == SFmode || !TARGET_VFP_SINGLE))
9850         {
9851           if (GET_CODE (XEXP (x, 0)) == MULT)
9852             {
9853               rtx mul_op0, mul_op1, add_op;
9854
9855               if (speed_p)
9856                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9857
9858               mul_op0 = XEXP (XEXP (x, 0), 0);
9859               mul_op1 = XEXP (XEXP (x, 0), 1);
9860               add_op = XEXP (x, 1);
9861
9862               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9863                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9864                         + rtx_cost (add_op, mode, code, 0, speed_p));
9865
9866               return true;
9867             }
9868
9869           if (speed_p)
9870             *cost += extra_cost->fp[mode != SFmode].addsub;
9871           return false;
9872         }
9873       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9874         {
9875           *cost = LIBCALL_COST (2);
9876           return false;
9877         }
9878
9879         /* Narrow modes can be synthesized in SImode, but the range
9880            of useful sub-operations is limited.  Check for shift operations
9881            on one of the operands.  Only left shifts can be used in the
9882            narrow modes.  */
9883       if (GET_MODE_CLASS (mode) == MODE_INT
9884           && GET_MODE_SIZE (mode) < 4)
9885         {
9886           rtx shift_op, shift_reg;
9887           shift_reg = NULL;
9888
9889           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9890
9891           if (CONST_INT_P (XEXP (x, 1)))
9892             {
9893               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9894                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9895                                             NULL_RTX, 1, 0);
9896               *cost = COSTS_N_INSNS (insns);
9897               if (speed_p)
9898                 *cost += insns * extra_cost->alu.arith;
9899               /* Slightly penalize a narrow operation as the result may
9900                  need widening.  */
9901               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9902               return true;
9903             }
9904
9905           /* Slightly penalize a narrow operation as the result may
9906              need widening.  */
9907           *cost += 1;
9908           if (speed_p)
9909             *cost += extra_cost->alu.arith;
9910
9911           return false;
9912         }
9913
9914       if (mode == SImode)
9915         {
9916           rtx shift_op, shift_reg;
9917
9918           if (TARGET_INT_SIMD
9919               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9920                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9921             {
9922               /* UXTA[BH] or SXTA[BH].  */
9923               if (speed_p)
9924                 *cost += extra_cost->alu.extend_arith;
9925               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9926                                   0, speed_p)
9927                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9928               return true;
9929             }
9930
9931           shift_reg = NULL;
9932           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9933           if (shift_op != NULL)
9934             {
9935               if (shift_reg)
9936                 {
9937                   if (speed_p)
9938                     *cost += extra_cost->alu.arith_shift_reg;
9939                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9940                 }
9941               else if (speed_p)
9942                 *cost += extra_cost->alu.arith_shift;
9943
9944               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9945                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9946               return true;
9947             }
9948           if (GET_CODE (XEXP (x, 0)) == MULT)
9949             {
9950               rtx mul_op = XEXP (x, 0);
9951
9952               if (TARGET_DSP_MULTIPLY
9953                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9954                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9955                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9956                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9957                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9958                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9959                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9960                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9961                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9962                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9963                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9964                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9965                                       == 16))))))
9966                 {
9967                   /* SMLA[BT][BT].  */
9968                   if (speed_p)
9969                     *cost += extra_cost->mult[0].extend_add;
9970                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9971                                       SIGN_EXTEND, 0, speed_p)
9972                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9973                                         SIGN_EXTEND, 0, speed_p)
9974                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9975                   return true;
9976                 }
9977
9978               if (speed_p)
9979                 *cost += extra_cost->mult[0].add;
9980               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9981                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9982                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9983               return true;
9984             }
9985           if (CONST_INT_P (XEXP (x, 1)))
9986             {
9987               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9988                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9989                                             NULL_RTX, 1, 0);
9990               *cost = COSTS_N_INSNS (insns);
9991               if (speed_p)
9992                 *cost += insns * extra_cost->alu.arith;
9993               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9994               return true;
9995             }
9996           else if (speed_p)
9997             *cost += extra_cost->alu.arith;
9998
9999           return false;
10000         }
10001
10002       if (mode == DImode)
10003         {
10004           if (GET_CODE (XEXP (x, 0)) == MULT
10005               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10006                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10007                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10008                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10009             {
10010               if (speed_p)
10011                 *cost += extra_cost->mult[1].extend_add;
10012               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10013                                   ZERO_EXTEND, 0, speed_p)
10014                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10015                                     ZERO_EXTEND, 0, speed_p)
10016                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10017               return true;
10018             }
10019
10020           *cost += COSTS_N_INSNS (1);
10021
10022           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10023               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10024             {
10025               if (speed_p)
10026                 *cost += (extra_cost->alu.arith
10027                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10028                              ? extra_cost->alu.arith
10029                              : extra_cost->alu.arith_shift));
10030
10031               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10032                                   0, speed_p)
10033                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10034               return true;
10035             }
10036
10037           if (speed_p)
10038             *cost += 2 * extra_cost->alu.arith;
10039           return false;
10040         }
10041
10042       /* Vector mode?  */
10043       *cost = LIBCALL_COST (2);
10044       return false;
10045     case IOR:
10046       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10047         {
10048           if (speed_p)
10049             *cost += extra_cost->alu.rev;
10050
10051           return true;
10052         }
10053     /* Fall through.  */
10054     case AND: case XOR:
10055       if (mode == SImode)
10056         {
10057           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10058           rtx op0 = XEXP (x, 0);
10059           rtx shift_op, shift_reg;
10060
10061           if (subcode == NOT
10062               && (code == AND
10063                   || (code == IOR && TARGET_THUMB2)))
10064             op0 = XEXP (op0, 0);
10065
10066           shift_reg = NULL;
10067           shift_op = shifter_op_p (op0, &shift_reg);
10068           if (shift_op != NULL)
10069             {
10070               if (shift_reg)
10071                 {
10072                   if (speed_p)
10073                     *cost += extra_cost->alu.log_shift_reg;
10074                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10075                 }
10076               else if (speed_p)
10077                 *cost += extra_cost->alu.log_shift;
10078
10079               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10080                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10081               return true;
10082             }
10083
10084           if (CONST_INT_P (XEXP (x, 1)))
10085             {
10086               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10087                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10088                                             NULL_RTX, 1, 0);
10089
10090               *cost = COSTS_N_INSNS (insns);
10091               if (speed_p)
10092                 *cost += insns * extra_cost->alu.logical;
10093               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10094               return true;
10095             }
10096
10097           if (speed_p)
10098             *cost += extra_cost->alu.logical;
10099           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10100                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10101           return true;
10102         }
10103
10104       if (mode == DImode)
10105         {
10106           rtx op0 = XEXP (x, 0);
10107           enum rtx_code subcode = GET_CODE (op0);
10108
10109           *cost += COSTS_N_INSNS (1);
10110
10111           if (subcode == NOT
10112               && (code == AND
10113                   || (code == IOR && TARGET_THUMB2)))
10114             op0 = XEXP (op0, 0);
10115
10116           if (GET_CODE (op0) == ZERO_EXTEND)
10117             {
10118               if (speed_p)
10119                 *cost += 2 * extra_cost->alu.logical;
10120
10121               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10122                                   0, speed_p)
10123                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10124               return true;
10125             }
10126           else if (GET_CODE (op0) == SIGN_EXTEND)
10127             {
10128               if (speed_p)
10129                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10130
10131               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10132                                   0, speed_p)
10133                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10134               return true;
10135             }
10136
10137           if (speed_p)
10138             *cost += 2 * extra_cost->alu.logical;
10139
10140           return true;
10141         }
10142       /* Vector mode?  */
10143
10144       *cost = LIBCALL_COST (2);
10145       return false;
10146
10147     case MULT:
10148       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10149           && (mode == SFmode || !TARGET_VFP_SINGLE))
10150         {
10151           rtx op0 = XEXP (x, 0);
10152
10153           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10154             op0 = XEXP (op0, 0);
10155
10156           if (speed_p)
10157             *cost += extra_cost->fp[mode != SFmode].mult;
10158
10159           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10160                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10161           return true;
10162         }
10163       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10164         {
10165           *cost = LIBCALL_COST (2);
10166           return false;
10167         }
10168
10169       if (mode == SImode)
10170         {
10171           if (TARGET_DSP_MULTIPLY
10172               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10173                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10174                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10175                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10176                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10177                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10178                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10179                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10180                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10181                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10182                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10183                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10184                                   == 16))))))
10185             {
10186               /* SMUL[TB][TB].  */
10187               if (speed_p)
10188                 *cost += extra_cost->mult[0].extend;
10189               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10190                                  SIGN_EXTEND, 0, speed_p);
10191               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10192                                  SIGN_EXTEND, 1, speed_p);
10193               return true;
10194             }
10195           if (speed_p)
10196             *cost += extra_cost->mult[0].simple;
10197           return false;
10198         }
10199
10200       if (mode == DImode)
10201         {
10202           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10203                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10204                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10205                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10206             {
10207               if (speed_p)
10208                 *cost += extra_cost->mult[1].extend;
10209               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10210                                   ZERO_EXTEND, 0, speed_p)
10211                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10212                                     ZERO_EXTEND, 0, speed_p));
10213               return true;
10214             }
10215
10216           *cost = LIBCALL_COST (2);
10217           return false;
10218         }
10219
10220       /* Vector mode?  */
10221       *cost = LIBCALL_COST (2);
10222       return false;
10223
10224     case NEG:
10225       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10226           && (mode == SFmode || !TARGET_VFP_SINGLE))
10227         {
10228           if (GET_CODE (XEXP (x, 0)) == MULT)
10229             {
10230               /* VNMUL.  */
10231               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10232               return true;
10233             }
10234
10235           if (speed_p)
10236             *cost += extra_cost->fp[mode != SFmode].neg;
10237
10238           return false;
10239         }
10240       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10241         {
10242           *cost = LIBCALL_COST (1);
10243           return false;
10244         }
10245
10246       if (mode == SImode)
10247         {
10248           if (GET_CODE (XEXP (x, 0)) == ABS)
10249             {
10250               *cost += COSTS_N_INSNS (1);
10251               /* Assume the non-flag-changing variant.  */
10252               if (speed_p)
10253                 *cost += (extra_cost->alu.log_shift
10254                           + extra_cost->alu.arith_shift);
10255               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10256               return true;
10257             }
10258
10259           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10260               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10261             {
10262               *cost += COSTS_N_INSNS (1);
10263               /* No extra cost for MOV imm and MVN imm.  */
10264               /* If the comparison op is using the flags, there's no further
10265                  cost, otherwise we need to add the cost of the comparison.  */
10266               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10267                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10268                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10269                 {
10270                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10271                   *cost += (COSTS_N_INSNS (1)
10272                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10273                                         0, speed_p)
10274                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10275                                         1, speed_p));
10276                   if (speed_p)
10277                     *cost += extra_cost->alu.arith;
10278                 }
10279               return true;
10280             }
10281
10282           if (speed_p)
10283             *cost += extra_cost->alu.arith;
10284           return false;
10285         }
10286
10287       if (GET_MODE_CLASS (mode) == MODE_INT
10288           && GET_MODE_SIZE (mode) < 4)
10289         {
10290           /* Slightly disparage, as we might need an extend operation.  */
10291           *cost += 1;
10292           if (speed_p)
10293             *cost += extra_cost->alu.arith;
10294           return false;
10295         }
10296
10297       if (mode == DImode)
10298         {
10299           *cost += COSTS_N_INSNS (1);
10300           if (speed_p)
10301             *cost += 2 * extra_cost->alu.arith;
10302           return false;
10303         }
10304
10305       /* Vector mode?  */
10306       *cost = LIBCALL_COST (1);
10307       return false;
10308
10309     case NOT:
10310       if (mode == SImode)
10311         {
10312           rtx shift_op;
10313           rtx shift_reg = NULL;
10314
10315           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10316
10317           if (shift_op)
10318             {
10319               if (shift_reg != NULL)
10320                 {
10321                   if (speed_p)
10322                     *cost += extra_cost->alu.log_shift_reg;
10323                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10324                 }
10325               else if (speed_p)
10326                 *cost += extra_cost->alu.log_shift;
10327               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10328               return true;
10329             }
10330
10331           if (speed_p)
10332             *cost += extra_cost->alu.logical;
10333           return false;
10334         }
10335       if (mode == DImode)
10336         {
10337           *cost += COSTS_N_INSNS (1);
10338           return false;
10339         }
10340
10341       /* Vector mode?  */
10342
10343       *cost += LIBCALL_COST (1);
10344       return false;
10345
10346     case IF_THEN_ELSE:
10347       {
10348         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10349           {
10350             *cost += COSTS_N_INSNS (3);
10351             return true;
10352           }
10353         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10354         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10355
10356         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10357         /* Assume that if one arm of the if_then_else is a register,
10358            that it will be tied with the result and eliminate the
10359            conditional insn.  */
10360         if (REG_P (XEXP (x, 1)))
10361           *cost += op2cost;
10362         else if (REG_P (XEXP (x, 2)))
10363           *cost += op1cost;
10364         else
10365           {
10366             if (speed_p)
10367               {
10368                 if (extra_cost->alu.non_exec_costs_exec)
10369                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10370                 else
10371                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10372               }
10373             else
10374               *cost += op1cost + op2cost;
10375           }
10376       }
10377       return true;
10378
10379     case COMPARE:
10380       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10381         *cost = 0;
10382       else
10383         {
10384           machine_mode op0mode;
10385           /* We'll mostly assume that the cost of a compare is the cost of the
10386              LHS.  However, there are some notable exceptions.  */
10387
10388           /* Floating point compares are never done as side-effects.  */
10389           op0mode = GET_MODE (XEXP (x, 0));
10390           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10391               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10392             {
10393               if (speed_p)
10394                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10395
10396               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10397                 {
10398                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10399                   return true;
10400                 }
10401
10402               return false;
10403             }
10404           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10405             {
10406               *cost = LIBCALL_COST (2);
10407               return false;
10408             }
10409
10410           /* DImode compares normally take two insns.  */
10411           if (op0mode == DImode)
10412             {
10413               *cost += COSTS_N_INSNS (1);
10414               if (speed_p)
10415                 *cost += 2 * extra_cost->alu.arith;
10416               return false;
10417             }
10418
10419           if (op0mode == SImode)
10420             {
10421               rtx shift_op;
10422               rtx shift_reg;
10423
10424               if (XEXP (x, 1) == const0_rtx
10425                   && !(REG_P (XEXP (x, 0))
10426                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10427                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10428                 {
10429                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10430
10431                   /* Multiply operations that set the flags are often
10432                      significantly more expensive.  */
10433                   if (speed_p
10434                       && GET_CODE (XEXP (x, 0)) == MULT
10435                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10436                     *cost += extra_cost->mult[0].flag_setting;
10437
10438                   if (speed_p
10439                       && GET_CODE (XEXP (x, 0)) == PLUS
10440                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10441                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10442                                                             0), 1), mode))
10443                     *cost += extra_cost->mult[0].flag_setting;
10444                   return true;
10445                 }
10446
10447               shift_reg = NULL;
10448               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10449               if (shift_op != NULL)
10450                 {
10451                   if (shift_reg != NULL)
10452                     {
10453                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10454                                          1, speed_p);
10455                       if (speed_p)
10456                         *cost += extra_cost->alu.arith_shift_reg;
10457                     }
10458                   else if (speed_p)
10459                     *cost += extra_cost->alu.arith_shift;
10460                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10461                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10462                   return true;
10463                 }
10464
10465               if (speed_p)
10466                 *cost += extra_cost->alu.arith;
10467               if (CONST_INT_P (XEXP (x, 1))
10468                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10469                 {
10470                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10471                   return true;
10472                 }
10473               return false;
10474             }
10475
10476           /* Vector mode?  */
10477
10478           *cost = LIBCALL_COST (2);
10479           return false;
10480         }
10481       return true;
10482
10483     case EQ:
10484     case NE:
10485     case LT:
10486     case LE:
10487     case GT:
10488     case GE:
10489     case LTU:
10490     case LEU:
10491     case GEU:
10492     case GTU:
10493     case ORDERED:
10494     case UNORDERED:
10495     case UNEQ:
10496     case UNLE:
10497     case UNLT:
10498     case UNGE:
10499     case UNGT:
10500     case LTGT:
10501       if (outer_code == SET)
10502         {
10503           /* Is it a store-flag operation?  */
10504           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10505               && XEXP (x, 1) == const0_rtx)
10506             {
10507               /* Thumb also needs an IT insn.  */
10508               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10509               return true;
10510             }
10511           if (XEXP (x, 1) == const0_rtx)
10512             {
10513               switch (code)
10514                 {
10515                 case LT:
10516                   /* LSR Rd, Rn, #31.  */
10517                   if (speed_p)
10518                     *cost += extra_cost->alu.shift;
10519                   break;
10520
10521                 case EQ:
10522                   /* RSBS T1, Rn, #0
10523                      ADC  Rd, Rn, T1.  */
10524
10525                 case NE:
10526                   /* SUBS T1, Rn, #1
10527                      SBC  Rd, Rn, T1.  */
10528                   *cost += COSTS_N_INSNS (1);
10529                   break;
10530
10531                 case LE:
10532                   /* RSBS T1, Rn, Rn, LSR #31
10533                      ADC  Rd, Rn, T1. */
10534                   *cost += COSTS_N_INSNS (1);
10535                   if (speed_p)
10536                     *cost += extra_cost->alu.arith_shift;
10537                   break;
10538
10539                 case GT:
10540                   /* RSB  Rd, Rn, Rn, ASR #1
10541                      LSR  Rd, Rd, #31.  */
10542                   *cost += COSTS_N_INSNS (1);
10543                   if (speed_p)
10544                     *cost += (extra_cost->alu.arith_shift
10545                               + extra_cost->alu.shift);
10546                   break;
10547
10548                 case GE:
10549                   /* ASR  Rd, Rn, #31
10550                      ADD  Rd, Rn, #1.  */
10551                   *cost += COSTS_N_INSNS (1);
10552                   if (speed_p)
10553                     *cost += extra_cost->alu.shift;
10554                   break;
10555
10556                 default:
10557                   /* Remaining cases are either meaningless or would take
10558                      three insns anyway.  */
10559                   *cost = COSTS_N_INSNS (3);
10560                   break;
10561                 }
10562               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10563               return true;
10564             }
10565           else
10566             {
10567               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10568               if (CONST_INT_P (XEXP (x, 1))
10569                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10570                 {
10571                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10572                   return true;
10573                 }
10574
10575               return false;
10576             }
10577         }
10578       /* Not directly inside a set.  If it involves the condition code
10579          register it must be the condition for a branch, cond_exec or
10580          I_T_E operation.  Since the comparison is performed elsewhere
10581          this is just the control part which has no additional
10582          cost.  */
10583       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10584                && XEXP (x, 1) == const0_rtx)
10585         {
10586           *cost = 0;
10587           return true;
10588         }
10589       return false;
10590
10591     case ABS:
10592       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10593           && (mode == SFmode || !TARGET_VFP_SINGLE))
10594         {
10595           if (speed_p)
10596             *cost += extra_cost->fp[mode != SFmode].neg;
10597
10598           return false;
10599         }
10600       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10601         {
10602           *cost = LIBCALL_COST (1);
10603           return false;
10604         }
10605
10606       if (mode == SImode)
10607         {
10608           if (speed_p)
10609             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10610           return false;
10611         }
10612       /* Vector mode?  */
10613       *cost = LIBCALL_COST (1);
10614       return false;
10615
10616     case SIGN_EXTEND:
10617       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10618           && MEM_P (XEXP (x, 0)))
10619         {
10620           if (mode == DImode)
10621             *cost += COSTS_N_INSNS (1);
10622
10623           if (!speed_p)
10624             return true;
10625
10626           if (GET_MODE (XEXP (x, 0)) == SImode)
10627             *cost += extra_cost->ldst.load;
10628           else
10629             *cost += extra_cost->ldst.load_sign_extend;
10630
10631           if (mode == DImode)
10632             *cost += extra_cost->alu.shift;
10633
10634           return true;
10635         }
10636
10637       /* Widening from less than 32-bits requires an extend operation.  */
10638       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10639         {
10640           /* We have SXTB/SXTH.  */
10641           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10642           if (speed_p)
10643             *cost += extra_cost->alu.extend;
10644         }
10645       else if (GET_MODE (XEXP (x, 0)) != SImode)
10646         {
10647           /* Needs two shifts.  */
10648           *cost += COSTS_N_INSNS (1);
10649           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10650           if (speed_p)
10651             *cost += 2 * extra_cost->alu.shift;
10652         }
10653
10654       /* Widening beyond 32-bits requires one more insn.  */
10655       if (mode == DImode)
10656         {
10657           *cost += COSTS_N_INSNS (1);
10658           if (speed_p)
10659             *cost += extra_cost->alu.shift;
10660         }
10661
10662       return true;
10663
10664     case ZERO_EXTEND:
10665       if ((arm_arch4
10666            || GET_MODE (XEXP (x, 0)) == SImode
10667            || GET_MODE (XEXP (x, 0)) == QImode)
10668           && MEM_P (XEXP (x, 0)))
10669         {
10670           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10671
10672           if (mode == DImode)
10673             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10674
10675           return true;
10676         }
10677
10678       /* Widening from less than 32-bits requires an extend operation.  */
10679       if (GET_MODE (XEXP (x, 0)) == QImode)
10680         {
10681           /* UXTB can be a shorter instruction in Thumb2, but it might
10682              be slower than the AND Rd, Rn, #255 alternative.  When
10683              optimizing for speed it should never be slower to use
10684              AND, and we don't really model 16-bit vs 32-bit insns
10685              here.  */
10686           if (speed_p)
10687             *cost += extra_cost->alu.logical;
10688         }
10689       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10690         {
10691           /* We have UXTB/UXTH.  */
10692           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10693           if (speed_p)
10694             *cost += extra_cost->alu.extend;
10695         }
10696       else if (GET_MODE (XEXP (x, 0)) != SImode)
10697         {
10698           /* Needs two shifts.  It's marginally preferable to use
10699              shifts rather than two BIC instructions as the second
10700              shift may merge with a subsequent insn as a shifter
10701              op.  */
10702           *cost = COSTS_N_INSNS (2);
10703           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10704           if (speed_p)
10705             *cost += 2 * extra_cost->alu.shift;
10706         }
10707
10708       /* Widening beyond 32-bits requires one more insn.  */
10709       if (mode == DImode)
10710         {
10711           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10712         }
10713
10714       return true;
10715
10716     case CONST_INT:
10717       *cost = 0;
10718       /* CONST_INT has no mode, so we cannot tell for sure how many
10719          insns are really going to be needed.  The best we can do is
10720          look at the value passed.  If it fits in SImode, then assume
10721          that's the mode it will be used for.  Otherwise assume it
10722          will be used in DImode.  */
10723       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10724         mode = SImode;
10725       else
10726         mode = DImode;
10727
10728       /* Avoid blowing up in arm_gen_constant ().  */
10729       if (!(outer_code == PLUS
10730             || outer_code == AND
10731             || outer_code == IOR
10732             || outer_code == XOR
10733             || outer_code == MINUS))
10734         outer_code = SET;
10735
10736     const_int_cost:
10737       if (mode == SImode)
10738         {
10739           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10740                                                     INTVAL (x), NULL, NULL,
10741                                                     0, 0));
10742           /* Extra costs?  */
10743         }
10744       else
10745         {
10746           *cost += COSTS_N_INSNS (arm_gen_constant
10747                                   (outer_code, SImode, NULL,
10748                                    trunc_int_for_mode (INTVAL (x), SImode),
10749                                    NULL, NULL, 0, 0)
10750                                   + arm_gen_constant (outer_code, SImode, NULL,
10751                                                       INTVAL (x) >> 32, NULL,
10752                                                       NULL, 0, 0));
10753           /* Extra costs?  */
10754         }
10755
10756       return true;
10757
10758     case CONST:
10759     case LABEL_REF:
10760     case SYMBOL_REF:
10761       if (speed_p)
10762         {
10763           if (arm_arch_thumb2 && !flag_pic)
10764             *cost += COSTS_N_INSNS (1);
10765           else
10766             *cost += extra_cost->ldst.load;
10767         }
10768       else
10769         *cost += COSTS_N_INSNS (1);
10770
10771       if (flag_pic)
10772         {
10773           *cost += COSTS_N_INSNS (1);
10774           if (speed_p)
10775             *cost += extra_cost->alu.arith;
10776         }
10777
10778       return true;
10779
10780     case CONST_FIXED:
10781       *cost = COSTS_N_INSNS (4);
10782       /* Fixme.  */
10783       return true;
10784
10785     case CONST_DOUBLE:
10786       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10787           && (mode == SFmode || !TARGET_VFP_SINGLE))
10788         {
10789           if (vfp3_const_double_rtx (x))
10790             {
10791               if (speed_p)
10792                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10793               return true;
10794             }
10795
10796           if (speed_p)
10797             {
10798               if (mode == DFmode)
10799                 *cost += extra_cost->ldst.loadd;
10800               else
10801                 *cost += extra_cost->ldst.loadf;
10802             }
10803           else
10804             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10805
10806           return true;
10807         }
10808       *cost = COSTS_N_INSNS (4);
10809       return true;
10810
10811     case CONST_VECTOR:
10812       /* Fixme.  */
10813       if (TARGET_NEON
10814           && TARGET_HARD_FLOAT
10815           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10816           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10817         *cost = COSTS_N_INSNS (1);
10818       else
10819         *cost = COSTS_N_INSNS (4);
10820       return true;
10821
10822     case HIGH:
10823     case LO_SUM:
10824       /* When optimizing for size, we prefer constant pool entries to
10825          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10826       if (!speed_p)
10827         *cost += 1;
10828       return true;
10829
10830     case CLZ:
10831       if (speed_p)
10832         *cost += extra_cost->alu.clz;
10833       return false;
10834
10835     case SMIN:
10836       if (XEXP (x, 1) == const0_rtx)
10837         {
10838           if (speed_p)
10839             *cost += extra_cost->alu.log_shift;
10840           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10841           return true;
10842         }
10843       /* Fall through.  */
10844     case SMAX:
10845     case UMIN:
10846     case UMAX:
10847       *cost += COSTS_N_INSNS (1);
10848       return false;
10849
10850     case TRUNCATE:
10851       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10852           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10853           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10854           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10855           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10856                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10857               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10858                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10859                       == ZERO_EXTEND))))
10860         {
10861           if (speed_p)
10862             *cost += extra_cost->mult[1].extend;
10863           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10864                               ZERO_EXTEND, 0, speed_p)
10865                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10866                                 ZERO_EXTEND, 0, speed_p));
10867           return true;
10868         }
10869       *cost = LIBCALL_COST (1);
10870       return false;
10871
10872     case UNSPEC_VOLATILE:
10873     case UNSPEC:
10874       return arm_unspec_cost (x, outer_code, speed_p, cost);
10875
10876     case PC:
10877       /* Reading the PC is like reading any other register.  Writing it
10878          is more expensive, but we take that into account elsewhere.  */
10879       *cost = 0;
10880       return true;
10881
10882     case ZERO_EXTRACT:
10883       /* TODO: Simple zero_extract of bottom bits using AND.  */
10884       /* Fall through.  */
10885     case SIGN_EXTRACT:
10886       if (arm_arch6
10887           && mode == SImode
10888           && CONST_INT_P (XEXP (x, 1))
10889           && CONST_INT_P (XEXP (x, 2)))
10890         {
10891           if (speed_p)
10892             *cost += extra_cost->alu.bfx;
10893           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10894           return true;
10895         }
10896       /* Without UBFX/SBFX, need to resort to shift operations.  */
10897       *cost += COSTS_N_INSNS (1);
10898       if (speed_p)
10899         *cost += 2 * extra_cost->alu.shift;
10900       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10901       return true;
10902
10903     case FLOAT_EXTEND:
10904       if (TARGET_HARD_FLOAT)
10905         {
10906           if (speed_p)
10907             *cost += extra_cost->fp[mode == DFmode].widen;
10908           if (!TARGET_VFP5
10909               && GET_MODE (XEXP (x, 0)) == HFmode)
10910             {
10911               /* Pre v8, widening HF->DF is a two-step process, first
10912                  widening to SFmode.  */
10913               *cost += COSTS_N_INSNS (1);
10914               if (speed_p)
10915                 *cost += extra_cost->fp[0].widen;
10916             }
10917           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10918           return true;
10919         }
10920
10921       *cost = LIBCALL_COST (1);
10922       return false;
10923
10924     case FLOAT_TRUNCATE:
10925       if (TARGET_HARD_FLOAT)
10926         {
10927           if (speed_p)
10928             *cost += extra_cost->fp[mode == DFmode].narrow;
10929           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10930           return true;
10931           /* Vector modes?  */
10932         }
10933       *cost = LIBCALL_COST (1);
10934       return false;
10935
10936     case FMA:
10937       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10938         {
10939           rtx op0 = XEXP (x, 0);
10940           rtx op1 = XEXP (x, 1);
10941           rtx op2 = XEXP (x, 2);
10942
10943
10944           /* vfms or vfnma.  */
10945           if (GET_CODE (op0) == NEG)
10946             op0 = XEXP (op0, 0);
10947
10948           /* vfnms or vfnma.  */
10949           if (GET_CODE (op2) == NEG)
10950             op2 = XEXP (op2, 0);
10951
10952           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10953           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10954           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10955
10956           if (speed_p)
10957             *cost += extra_cost->fp[mode ==DFmode].fma;
10958
10959           return true;
10960         }
10961
10962       *cost = LIBCALL_COST (3);
10963       return false;
10964
10965     case FIX:
10966     case UNSIGNED_FIX:
10967       if (TARGET_HARD_FLOAT)
10968         {
10969           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10970              a vcvt fixed-point conversion.  */
10971           if (code == FIX && mode == SImode
10972               && GET_CODE (XEXP (x, 0)) == FIX
10973               && GET_MODE (XEXP (x, 0)) == SFmode
10974               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10975               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10976                  > 0)
10977             {
10978               if (speed_p)
10979                 *cost += extra_cost->fp[0].toint;
10980
10981               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10982                                  code, 0, speed_p);
10983               return true;
10984             }
10985
10986           if (GET_MODE_CLASS (mode) == MODE_INT)
10987             {
10988               mode = GET_MODE (XEXP (x, 0));
10989               if (speed_p)
10990                 *cost += extra_cost->fp[mode == DFmode].toint;
10991               /* Strip of the 'cost' of rounding towards zero.  */
10992               if (GET_CODE (XEXP (x, 0)) == FIX)
10993                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10994                                    0, speed_p);
10995               else
10996                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10997               /* ??? Increase the cost to deal with transferring from
10998                  FP -> CORE registers?  */
10999               return true;
11000             }
11001           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11002                    && TARGET_VFP5)
11003             {
11004               if (speed_p)
11005                 *cost += extra_cost->fp[mode == DFmode].roundint;
11006               return false;
11007             }
11008           /* Vector costs? */
11009         }
11010       *cost = LIBCALL_COST (1);
11011       return false;
11012
11013     case FLOAT:
11014     case UNSIGNED_FLOAT:
11015       if (TARGET_HARD_FLOAT)
11016         {
11017           /* ??? Increase the cost to deal with transferring from CORE
11018              -> FP registers?  */
11019           if (speed_p)
11020             *cost += extra_cost->fp[mode == DFmode].fromint;
11021           return false;
11022         }
11023       *cost = LIBCALL_COST (1);
11024       return false;
11025
11026     case CALL:
11027       return true;
11028
11029     case ASM_OPERANDS:
11030       {
11031       /* Just a guess.  Guess number of instructions in the asm
11032          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11033          though (see PR60663).  */
11034         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11035         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11036
11037         *cost = COSTS_N_INSNS (asm_length + num_operands);
11038         return true;
11039       }
11040     default:
11041       if (mode != VOIDmode)
11042         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11043       else
11044         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11045       return false;
11046     }
11047 }
11048
11049 #undef HANDLE_NARROW_SHIFT_ARITH
11050
11051 /* RTX costs entry point.  */
11052
11053 static bool
11054 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11055                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11056 {
11057   bool result;
11058   int code = GET_CODE (x);
11059   gcc_assert (current_tune->insn_extra_cost);
11060
11061   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11062                                 (enum rtx_code) outer_code,
11063                                 current_tune->insn_extra_cost,
11064                                 total, speed);
11065
11066   if (dump_file && arm_verbose_cost)
11067     {
11068       print_rtl_single (dump_file, x);
11069       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11070                *total, result ? "final" : "partial");
11071     }
11072   return result;
11073 }
11074
11075 /* All address computations that can be done are free, but rtx cost returns
11076    the same for practically all of them.  So we weight the different types
11077    of address here in the order (most pref first):
11078    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11079 static inline int
11080 arm_arm_address_cost (rtx x)
11081 {
11082   enum rtx_code c  = GET_CODE (x);
11083
11084   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11085     return 0;
11086   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11087     return 10;
11088
11089   if (c == PLUS)
11090     {
11091       if (CONST_INT_P (XEXP (x, 1)))
11092         return 2;
11093
11094       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11095         return 3;
11096
11097       return 4;
11098     }
11099
11100   return 6;
11101 }
11102
11103 static inline int
11104 arm_thumb_address_cost (rtx x)
11105 {
11106   enum rtx_code c  = GET_CODE (x);
11107
11108   if (c == REG)
11109     return 1;
11110   if (c == PLUS
11111       && REG_P (XEXP (x, 0))
11112       && CONST_INT_P (XEXP (x, 1)))
11113     return 1;
11114
11115   return 2;
11116 }
11117
11118 static int
11119 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11120                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11121 {
11122   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11123 }
11124
11125 /* Adjust cost hook for XScale.  */
11126 static bool
11127 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11128                           int * cost)
11129 {
11130   /* Some true dependencies can have a higher cost depending
11131      on precisely how certain input operands are used.  */
11132   if (dep_type == 0
11133       && recog_memoized (insn) >= 0
11134       && recog_memoized (dep) >= 0)
11135     {
11136       int shift_opnum = get_attr_shift (insn);
11137       enum attr_type attr_type = get_attr_type (dep);
11138
11139       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11140          operand for INSN.  If we have a shifted input operand and the
11141          instruction we depend on is another ALU instruction, then we may
11142          have to account for an additional stall.  */
11143       if (shift_opnum != 0
11144           && (attr_type == TYPE_ALU_SHIFT_IMM
11145               || attr_type == TYPE_ALUS_SHIFT_IMM
11146               || attr_type == TYPE_LOGIC_SHIFT_IMM
11147               || attr_type == TYPE_LOGICS_SHIFT_IMM
11148               || attr_type == TYPE_ALU_SHIFT_REG
11149               || attr_type == TYPE_ALUS_SHIFT_REG
11150               || attr_type == TYPE_LOGIC_SHIFT_REG
11151               || attr_type == TYPE_LOGICS_SHIFT_REG
11152               || attr_type == TYPE_MOV_SHIFT
11153               || attr_type == TYPE_MVN_SHIFT
11154               || attr_type == TYPE_MOV_SHIFT_REG
11155               || attr_type == TYPE_MVN_SHIFT_REG))
11156         {
11157           rtx shifted_operand;
11158           int opno;
11159
11160           /* Get the shifted operand.  */
11161           extract_insn (insn);
11162           shifted_operand = recog_data.operand[shift_opnum];
11163
11164           /* Iterate over all the operands in DEP.  If we write an operand
11165              that overlaps with SHIFTED_OPERAND, then we have increase the
11166              cost of this dependency.  */
11167           extract_insn (dep);
11168           preprocess_constraints (dep);
11169           for (opno = 0; opno < recog_data.n_operands; opno++)
11170             {
11171               /* We can ignore strict inputs.  */
11172               if (recog_data.operand_type[opno] == OP_IN)
11173                 continue;
11174
11175               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11176                                            shifted_operand))
11177                 {
11178                   *cost = 2;
11179                   return false;
11180                 }
11181             }
11182         }
11183     }
11184   return true;
11185 }
11186
11187 /* Adjust cost hook for Cortex A9.  */
11188 static bool
11189 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11190                              int * cost)
11191 {
11192   switch (dep_type)
11193     {
11194     case REG_DEP_ANTI:
11195       *cost = 0;
11196       return false;
11197
11198     case REG_DEP_TRUE:
11199     case REG_DEP_OUTPUT:
11200         if (recog_memoized (insn) >= 0
11201             && recog_memoized (dep) >= 0)
11202           {
11203             if (GET_CODE (PATTERN (insn)) == SET)
11204               {
11205                 if (GET_MODE_CLASS
11206                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11207                   || GET_MODE_CLASS
11208                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11209                   {
11210                     enum attr_type attr_type_insn = get_attr_type (insn);
11211                     enum attr_type attr_type_dep = get_attr_type (dep);
11212
11213                     /* By default all dependencies of the form
11214                        s0 = s0 <op> s1
11215                        s0 = s0 <op> s2
11216                        have an extra latency of 1 cycle because
11217                        of the input and output dependency in this
11218                        case. However this gets modeled as an true
11219                        dependency and hence all these checks.  */
11220                     if (REG_P (SET_DEST (PATTERN (insn)))
11221                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11222                       {
11223                         /* FMACS is a special case where the dependent
11224                            instruction can be issued 3 cycles before
11225                            the normal latency in case of an output
11226                            dependency.  */
11227                         if ((attr_type_insn == TYPE_FMACS
11228                              || attr_type_insn == TYPE_FMACD)
11229                             && (attr_type_dep == TYPE_FMACS
11230                                 || attr_type_dep == TYPE_FMACD))
11231                           {
11232                             if (dep_type == REG_DEP_OUTPUT)
11233                               *cost = insn_default_latency (dep) - 3;
11234                             else
11235                               *cost = insn_default_latency (dep);
11236                             return false;
11237                           }
11238                         else
11239                           {
11240                             if (dep_type == REG_DEP_OUTPUT)
11241                               *cost = insn_default_latency (dep) + 1;
11242                             else
11243                               *cost = insn_default_latency (dep);
11244                           }
11245                         return false;
11246                       }
11247                   }
11248               }
11249           }
11250         break;
11251
11252     default:
11253       gcc_unreachable ();
11254     }
11255
11256   return true;
11257 }
11258
11259 /* Adjust cost hook for FA726TE.  */
11260 static bool
11261 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11262                            int * cost)
11263 {
11264   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11265      have penalty of 3.  */
11266   if (dep_type == REG_DEP_TRUE
11267       && recog_memoized (insn) >= 0
11268       && recog_memoized (dep) >= 0
11269       && get_attr_conds (dep) == CONDS_SET)
11270     {
11271       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11272       if (get_attr_conds (insn) == CONDS_USE
11273           && get_attr_type (insn) != TYPE_BRANCH)
11274         {
11275           *cost = 3;
11276           return false;
11277         }
11278
11279       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11280           || get_attr_conds (insn) == CONDS_USE)
11281         {
11282           *cost = 0;
11283           return false;
11284         }
11285     }
11286
11287   return true;
11288 }
11289
11290 /* Implement TARGET_REGISTER_MOVE_COST.
11291
11292    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11293    it is typically more expensive than a single memory access.  We set
11294    the cost to less than two memory accesses so that floating
11295    point to integer conversion does not go through memory.  */
11296
11297 int
11298 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11299                         reg_class_t from, reg_class_t to)
11300 {
11301   if (TARGET_32BIT)
11302     {
11303       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11304           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11305         return 15;
11306       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11307                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11308         return 4;
11309       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11310         return 20;
11311       else
11312         return 2;
11313     }
11314   else
11315     {
11316       if (from == HI_REGS || to == HI_REGS)
11317         return 4;
11318       else
11319         return 2;
11320     }
11321 }
11322
11323 /* Implement TARGET_MEMORY_MOVE_COST.  */
11324
11325 int
11326 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11327                       bool in ATTRIBUTE_UNUSED)
11328 {
11329   if (TARGET_32BIT)
11330     return 10;
11331   else
11332     {
11333       if (GET_MODE_SIZE (mode) < 4)
11334         return 8;
11335       else
11336         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11337     }
11338 }
11339
11340 /* Vectorizer cost model implementation.  */
11341
11342 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11343 static int
11344 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11345                                 tree vectype,
11346                                 int misalign ATTRIBUTE_UNUSED)
11347 {
11348   unsigned elements;
11349
11350   switch (type_of_cost)
11351     {
11352       case scalar_stmt:
11353         return current_tune->vec_costs->scalar_stmt_cost;
11354
11355       case scalar_load:
11356         return current_tune->vec_costs->scalar_load_cost;
11357
11358       case scalar_store:
11359         return current_tune->vec_costs->scalar_store_cost;
11360
11361       case vector_stmt:
11362         return current_tune->vec_costs->vec_stmt_cost;
11363
11364       case vector_load:
11365         return current_tune->vec_costs->vec_align_load_cost;
11366
11367       case vector_store:
11368         return current_tune->vec_costs->vec_store_cost;
11369
11370       case vec_to_scalar:
11371         return current_tune->vec_costs->vec_to_scalar_cost;
11372
11373       case scalar_to_vec:
11374         return current_tune->vec_costs->scalar_to_vec_cost;
11375
11376       case unaligned_load:
11377       case vector_gather_load:
11378         return current_tune->vec_costs->vec_unalign_load_cost;
11379
11380       case unaligned_store:
11381       case vector_scatter_store:
11382         return current_tune->vec_costs->vec_unalign_store_cost;
11383
11384       case cond_branch_taken:
11385         return current_tune->vec_costs->cond_taken_branch_cost;
11386
11387       case cond_branch_not_taken:
11388         return current_tune->vec_costs->cond_not_taken_branch_cost;
11389
11390       case vec_perm:
11391       case vec_promote_demote:
11392         return current_tune->vec_costs->vec_stmt_cost;
11393
11394       case vec_construct:
11395         elements = TYPE_VECTOR_SUBPARTS (vectype);
11396         return elements / 2 + 1;
11397
11398       default:
11399         gcc_unreachable ();
11400     }
11401 }
11402
11403 /* Implement targetm.vectorize.add_stmt_cost.  */
11404
11405 static unsigned
11406 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11407                    struct _stmt_vec_info *stmt_info, int misalign,
11408                    enum vect_cost_model_location where)
11409 {
11410   unsigned *cost = (unsigned *) data;
11411   unsigned retval = 0;
11412
11413   if (flag_vect_cost_model)
11414     {
11415       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11416       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11417
11418       /* Statements in an inner loop relative to the loop being
11419          vectorized are weighted more heavily.  The value here is
11420          arbitrary and could potentially be improved with analysis.  */
11421       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11422         count *= 50;  /* FIXME.  */
11423
11424       retval = (unsigned) (count * stmt_cost);
11425       cost[where] += retval;
11426     }
11427
11428   return retval;
11429 }
11430
11431 /* Return true if and only if this insn can dual-issue only as older.  */
11432 static bool
11433 cortexa7_older_only (rtx_insn *insn)
11434 {
11435   if (recog_memoized (insn) < 0)
11436     return false;
11437
11438   switch (get_attr_type (insn))
11439     {
11440     case TYPE_ALU_DSP_REG:
11441     case TYPE_ALU_SREG:
11442     case TYPE_ALUS_SREG:
11443     case TYPE_LOGIC_REG:
11444     case TYPE_LOGICS_REG:
11445     case TYPE_ADC_REG:
11446     case TYPE_ADCS_REG:
11447     case TYPE_ADR:
11448     case TYPE_BFM:
11449     case TYPE_REV:
11450     case TYPE_MVN_REG:
11451     case TYPE_SHIFT_IMM:
11452     case TYPE_SHIFT_REG:
11453     case TYPE_LOAD_BYTE:
11454     case TYPE_LOAD_4:
11455     case TYPE_STORE_4:
11456     case TYPE_FFARITHS:
11457     case TYPE_FADDS:
11458     case TYPE_FFARITHD:
11459     case TYPE_FADDD:
11460     case TYPE_FMOV:
11461     case TYPE_F_CVT:
11462     case TYPE_FCMPS:
11463     case TYPE_FCMPD:
11464     case TYPE_FCONSTS:
11465     case TYPE_FCONSTD:
11466     case TYPE_FMULS:
11467     case TYPE_FMACS:
11468     case TYPE_FMULD:
11469     case TYPE_FMACD:
11470     case TYPE_FDIVS:
11471     case TYPE_FDIVD:
11472     case TYPE_F_MRC:
11473     case TYPE_F_MRRC:
11474     case TYPE_F_FLAG:
11475     case TYPE_F_LOADS:
11476     case TYPE_F_STORES:
11477       return true;
11478     default:
11479       return false;
11480     }
11481 }
11482
11483 /* Return true if and only if this insn can dual-issue as younger.  */
11484 static bool
11485 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11486 {
11487   if (recog_memoized (insn) < 0)
11488     {
11489       if (verbose > 5)
11490         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11491       return false;
11492     }
11493
11494   switch (get_attr_type (insn))
11495     {
11496     case TYPE_ALU_IMM:
11497     case TYPE_ALUS_IMM:
11498     case TYPE_LOGIC_IMM:
11499     case TYPE_LOGICS_IMM:
11500     case TYPE_EXTEND:
11501     case TYPE_MVN_IMM:
11502     case TYPE_MOV_IMM:
11503     case TYPE_MOV_REG:
11504     case TYPE_MOV_SHIFT:
11505     case TYPE_MOV_SHIFT_REG:
11506     case TYPE_BRANCH:
11507     case TYPE_CALL:
11508       return true;
11509     default:
11510       return false;
11511     }
11512 }
11513
11514
11515 /* Look for an instruction that can dual issue only as an older
11516    instruction, and move it in front of any instructions that can
11517    dual-issue as younger, while preserving the relative order of all
11518    other instructions in the ready list.  This is a hueuristic to help
11519    dual-issue in later cycles, by postponing issue of more flexible
11520    instructions.  This heuristic may affect dual issue opportunities
11521    in the current cycle.  */
11522 static void
11523 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11524                         int *n_readyp, int clock)
11525 {
11526   int i;
11527   int first_older_only = -1, first_younger = -1;
11528
11529   if (verbose > 5)
11530     fprintf (file,
11531              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11532              clock,
11533              *n_readyp);
11534
11535   /* Traverse the ready list from the head (the instruction to issue
11536      first), and looking for the first instruction that can issue as
11537      younger and the first instruction that can dual-issue only as
11538      older.  */
11539   for (i = *n_readyp - 1; i >= 0; i--)
11540     {
11541       rtx_insn *insn = ready[i];
11542       if (cortexa7_older_only (insn))
11543         {
11544           first_older_only = i;
11545           if (verbose > 5)
11546             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11547           break;
11548         }
11549       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11550         first_younger = i;
11551     }
11552
11553   /* Nothing to reorder because either no younger insn found or insn
11554      that can dual-issue only as older appears before any insn that
11555      can dual-issue as younger.  */
11556   if (first_younger == -1)
11557     {
11558       if (verbose > 5)
11559         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11560       return;
11561     }
11562
11563   /* Nothing to reorder because no older-only insn in the ready list.  */
11564   if (first_older_only == -1)
11565     {
11566       if (verbose > 5)
11567         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11568       return;
11569     }
11570
11571   /* Move first_older_only insn before first_younger.  */
11572   if (verbose > 5)
11573     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11574              INSN_UID(ready [first_older_only]),
11575              INSN_UID(ready [first_younger]));
11576   rtx_insn *first_older_only_insn = ready [first_older_only];
11577   for (i = first_older_only; i < first_younger; i++)
11578     {
11579       ready[i] = ready[i+1];
11580     }
11581
11582   ready[i] = first_older_only_insn;
11583   return;
11584 }
11585
11586 /* Implement TARGET_SCHED_REORDER. */
11587 static int
11588 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11589                    int clock)
11590 {
11591   switch (arm_tune)
11592     {
11593     case TARGET_CPU_cortexa7:
11594       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11595       break;
11596     default:
11597       /* Do nothing for other cores.  */
11598       break;
11599     }
11600
11601   return arm_issue_rate ();
11602 }
11603
11604 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11605    It corrects the value of COST based on the relationship between
11606    INSN and DEP through the dependence LINK.  It returns the new
11607    value. There is a per-core adjust_cost hook to adjust scheduler costs
11608    and the per-core hook can choose to completely override the generic
11609    adjust_cost function. Only put bits of code into arm_adjust_cost that
11610    are common across all cores.  */
11611 static int
11612 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11613                  unsigned int)
11614 {
11615   rtx i_pat, d_pat;
11616
11617  /* When generating Thumb-1 code, we want to place flag-setting operations
11618     close to a conditional branch which depends on them, so that we can
11619     omit the comparison. */
11620   if (TARGET_THUMB1
11621       && dep_type == 0
11622       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11623       && recog_memoized (dep) >= 0
11624       && get_attr_conds (dep) == CONDS_SET)
11625     return 0;
11626
11627   if (current_tune->sched_adjust_cost != NULL)
11628     {
11629       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11630         return cost;
11631     }
11632
11633   /* XXX Is this strictly true?  */
11634   if (dep_type == REG_DEP_ANTI
11635       || dep_type == REG_DEP_OUTPUT)
11636     return 0;
11637
11638   /* Call insns don't incur a stall, even if they follow a load.  */
11639   if (dep_type == 0
11640       && CALL_P (insn))
11641     return 1;
11642
11643   if ((i_pat = single_set (insn)) != NULL
11644       && MEM_P (SET_SRC (i_pat))
11645       && (d_pat = single_set (dep)) != NULL
11646       && MEM_P (SET_DEST (d_pat)))
11647     {
11648       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11649       /* This is a load after a store, there is no conflict if the load reads
11650          from a cached area.  Assume that loads from the stack, and from the
11651          constant pool are cached, and that others will miss.  This is a
11652          hack.  */
11653
11654       if ((GET_CODE (src_mem) == SYMBOL_REF
11655            && CONSTANT_POOL_ADDRESS_P (src_mem))
11656           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11657           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11658           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11659         return 1;
11660     }
11661
11662   return cost;
11663 }
11664
11665 int
11666 arm_max_conditional_execute (void)
11667 {
11668   return max_insns_skipped;
11669 }
11670
11671 static int
11672 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11673 {
11674   if (TARGET_32BIT)
11675     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11676   else
11677     return (optimize > 0) ? 2 : 0;
11678 }
11679
11680 static int
11681 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11682 {
11683   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11684 }
11685
11686 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11687    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11688    sequences of non-executed instructions in IT blocks probably take the same
11689    amount of time as executed instructions (and the IT instruction itself takes
11690    space in icache).  This function was experimentally determined to give good
11691    results on a popular embedded benchmark.  */
11692
11693 static int
11694 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11695 {
11696   return (TARGET_32BIT && speed_p) ? 1
11697          : arm_default_branch_cost (speed_p, predictable_p);
11698 }
11699
11700 static int
11701 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11702 {
11703   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11704 }
11705
11706 static bool fp_consts_inited = false;
11707
11708 static REAL_VALUE_TYPE value_fp0;
11709
11710 static void
11711 init_fp_table (void)
11712 {
11713   REAL_VALUE_TYPE r;
11714
11715   r = REAL_VALUE_ATOF ("0", DFmode);
11716   value_fp0 = r;
11717   fp_consts_inited = true;
11718 }
11719
11720 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11721 int
11722 arm_const_double_rtx (rtx x)
11723 {
11724   const REAL_VALUE_TYPE *r;
11725
11726   if (!fp_consts_inited)
11727     init_fp_table ();
11728
11729   r = CONST_DOUBLE_REAL_VALUE (x);
11730   if (REAL_VALUE_MINUS_ZERO (*r))
11731     return 0;
11732
11733   if (real_equal (r, &value_fp0))
11734     return 1;
11735
11736   return 0;
11737 }
11738
11739 /* VFPv3 has a fairly wide range of representable immediates, formed from
11740    "quarter-precision" floating-point values. These can be evaluated using this
11741    formula (with ^ for exponentiation):
11742
11743      -1^s * n * 2^-r
11744
11745    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11746    16 <= n <= 31 and 0 <= r <= 7.
11747
11748    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11749
11750      - A (most-significant) is the sign bit.
11751      - BCD are the exponent (encoded as r XOR 3).
11752      - EFGH are the mantissa (encoded as n - 16).
11753 */
11754
11755 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11756    fconst[sd] instruction, or -1 if X isn't suitable.  */
11757 static int
11758 vfp3_const_double_index (rtx x)
11759 {
11760   REAL_VALUE_TYPE r, m;
11761   int sign, exponent;
11762   unsigned HOST_WIDE_INT mantissa, mant_hi;
11763   unsigned HOST_WIDE_INT mask;
11764   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11765   bool fail;
11766
11767   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11768     return -1;
11769
11770   r = *CONST_DOUBLE_REAL_VALUE (x);
11771
11772   /* We can't represent these things, so detect them first.  */
11773   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11774     return -1;
11775
11776   /* Extract sign, exponent and mantissa.  */
11777   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11778   r = real_value_abs (&r);
11779   exponent = REAL_EXP (&r);
11780   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11781      highest (sign) bit, with a fixed binary point at bit point_pos.
11782      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11783      bits for the mantissa, this may fail (low bits would be lost).  */
11784   real_ldexp (&m, &r, point_pos - exponent);
11785   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11786   mantissa = w.elt (0);
11787   mant_hi = w.elt (1);
11788
11789   /* If there are bits set in the low part of the mantissa, we can't
11790      represent this value.  */
11791   if (mantissa != 0)
11792     return -1;
11793
11794   /* Now make it so that mantissa contains the most-significant bits, and move
11795      the point_pos to indicate that the least-significant bits have been
11796      discarded.  */
11797   point_pos -= HOST_BITS_PER_WIDE_INT;
11798   mantissa = mant_hi;
11799
11800   /* We can permit four significant bits of mantissa only, plus a high bit
11801      which is always 1.  */
11802   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11803   if ((mantissa & mask) != 0)
11804     return -1;
11805
11806   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11807   mantissa >>= point_pos - 5;
11808
11809   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11810      floating-point immediate zero with Neon using an integer-zero load, but
11811      that case is handled elsewhere.)  */
11812   if (mantissa == 0)
11813     return -1;
11814
11815   gcc_assert (mantissa >= 16 && mantissa <= 31);
11816
11817   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11818      normalized significands are in the range [1, 2). (Our mantissa is shifted
11819      left 4 places at this point relative to normalized IEEE754 values).  GCC
11820      internally uses [0.5, 1) (see real.c), so the exponent returned from
11821      REAL_EXP must be altered.  */
11822   exponent = 5 - exponent;
11823
11824   if (exponent < 0 || exponent > 7)
11825     return -1;
11826
11827   /* Sign, mantissa and exponent are now in the correct form to plug into the
11828      formula described in the comment above.  */
11829   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11830 }
11831
11832 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11833 int
11834 vfp3_const_double_rtx (rtx x)
11835 {
11836   if (!TARGET_VFP3)
11837     return 0;
11838
11839   return vfp3_const_double_index (x) != -1;
11840 }
11841
11842 /* Recognize immediates which can be used in various Neon instructions. Legal
11843    immediates are described by the following table (for VMVN variants, the
11844    bitwise inverse of the constant shown is recognized. In either case, VMOV
11845    is output and the correct instruction to use for a given constant is chosen
11846    by the assembler). The constant shown is replicated across all elements of
11847    the destination vector.
11848
11849    insn elems variant constant (binary)
11850    ---- ----- ------- -----------------
11851    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11852    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11853    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11854    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11855    vmov  i16     4    00000000 abcdefgh
11856    vmov  i16     5    abcdefgh 00000000
11857    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11858    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11859    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11860    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11861    vmvn  i16    10    00000000 abcdefgh
11862    vmvn  i16    11    abcdefgh 00000000
11863    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11864    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11865    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11866    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11867    vmov   i8    16    abcdefgh
11868    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11869                       eeeeeeee ffffffff gggggggg hhhhhhhh
11870    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11871    vmov  f32    19    00000000 00000000 00000000 00000000
11872
11873    For case 18, B = !b. Representable values are exactly those accepted by
11874    vfp3_const_double_index, but are output as floating-point numbers rather
11875    than indices.
11876
11877    For case 19, we will change it to vmov.i32 when assembling.
11878
11879    Variants 0-5 (inclusive) may also be used as immediates for the second
11880    operand of VORR/VBIC instructions.
11881
11882    The INVERSE argument causes the bitwise inverse of the given operand to be
11883    recognized instead (used for recognizing legal immediates for the VAND/VORN
11884    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11885    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11886    output, rather than the real insns vbic/vorr).
11887
11888    INVERSE makes no difference to the recognition of float vectors.
11889
11890    The return value is the variant of immediate as shown in the above table, or
11891    -1 if the given value doesn't match any of the listed patterns.
11892 */
11893 static int
11894 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11895                       rtx *modconst, int *elementwidth)
11896 {
11897 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11898   matches = 1;                                  \
11899   for (i = 0; i < idx; i += (STRIDE))           \
11900     if (!(TEST))                                \
11901       matches = 0;                              \
11902   if (matches)                                  \
11903     {                                           \
11904       immtype = (CLASS);                        \
11905       elsize = (ELSIZE);                        \
11906       break;                                    \
11907     }
11908
11909   unsigned int i, elsize = 0, idx = 0, n_elts;
11910   unsigned int innersize;
11911   unsigned char bytes[16];
11912   int immtype = -1, matches;
11913   unsigned int invmask = inverse ? 0xff : 0;
11914   bool vector = GET_CODE (op) == CONST_VECTOR;
11915
11916   if (vector)
11917     n_elts = CONST_VECTOR_NUNITS (op);
11918   else
11919     {
11920       n_elts = 1;
11921       if (mode == VOIDmode)
11922         mode = DImode;
11923     }
11924
11925   innersize = GET_MODE_UNIT_SIZE (mode);
11926
11927   /* Vectors of float constants.  */
11928   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11929     {
11930       rtx el0 = CONST_VECTOR_ELT (op, 0);
11931
11932       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11933         return -1;
11934
11935       /* FP16 vectors cannot be represented.  */
11936       if (GET_MODE_INNER (mode) == HFmode)
11937         return -1;
11938
11939       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11940          are distinct in this context.  */
11941       if (!const_vec_duplicate_p (op))
11942         return -1;
11943
11944       if (modconst)
11945         *modconst = CONST_VECTOR_ELT (op, 0);
11946
11947       if (elementwidth)
11948         *elementwidth = 0;
11949
11950       if (el0 == CONST0_RTX (GET_MODE (el0)))
11951         return 19;
11952       else
11953         return 18;
11954     }
11955
11956   /* The tricks done in the code below apply for little-endian vector layout.
11957      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11958      FIXME: Implement logic for big-endian vectors.  */
11959   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11960     return -1;
11961
11962   /* Splat vector constant out into a byte vector.  */
11963   for (i = 0; i < n_elts; i++)
11964     {
11965       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11966       unsigned HOST_WIDE_INT elpart;
11967
11968       gcc_assert (CONST_INT_P (el));
11969       elpart = INTVAL (el);
11970
11971       for (unsigned int byte = 0; byte < innersize; byte++)
11972         {
11973           bytes[idx++] = (elpart & 0xff) ^ invmask;
11974           elpart >>= BITS_PER_UNIT;
11975         }
11976     }
11977
11978   /* Sanity check.  */
11979   gcc_assert (idx == GET_MODE_SIZE (mode));
11980
11981   do
11982     {
11983       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11984                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11985
11986       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11987                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11988
11989       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11990                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11991
11992       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11993                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11994
11995       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11996
11997       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11998
11999       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12000                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12001
12002       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12003                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12004
12005       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12006                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12007
12008       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12009                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12010
12011       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12012
12013       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12014
12015       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12016                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12017
12018       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12019                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12020
12021       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12022                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12023
12024       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12025                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12026
12027       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12028
12029       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12030                         && bytes[i] == bytes[(i + 8) % idx]);
12031     }
12032   while (0);
12033
12034   if (immtype == -1)
12035     return -1;
12036
12037   if (elementwidth)
12038     *elementwidth = elsize;
12039
12040   if (modconst)
12041     {
12042       unsigned HOST_WIDE_INT imm = 0;
12043
12044       /* Un-invert bytes of recognized vector, if necessary.  */
12045       if (invmask != 0)
12046         for (i = 0; i < idx; i++)
12047           bytes[i] ^= invmask;
12048
12049       if (immtype == 17)
12050         {
12051           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12052           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12053
12054           for (i = 0; i < 8; i++)
12055             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12056                    << (i * BITS_PER_UNIT);
12057
12058           *modconst = GEN_INT (imm);
12059         }
12060       else
12061         {
12062           unsigned HOST_WIDE_INT imm = 0;
12063
12064           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12065             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12066
12067           *modconst = GEN_INT (imm);
12068         }
12069     }
12070
12071   return immtype;
12072 #undef CHECK
12073 }
12074
12075 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12076    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12077    float elements), and a modified constant (whatever should be output for a
12078    VMOV) in *MODCONST.  */
12079
12080 int
12081 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12082                                rtx *modconst, int *elementwidth)
12083 {
12084   rtx tmpconst;
12085   int tmpwidth;
12086   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12087
12088   if (retval == -1)
12089     return 0;
12090
12091   if (modconst)
12092     *modconst = tmpconst;
12093
12094   if (elementwidth)
12095     *elementwidth = tmpwidth;
12096
12097   return 1;
12098 }
12099
12100 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12101    the immediate is valid, write a constant suitable for using as an operand
12102    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12103    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12104
12105 int
12106 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12107                                 rtx *modconst, int *elementwidth)
12108 {
12109   rtx tmpconst;
12110   int tmpwidth;
12111   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12112
12113   if (retval < 0 || retval > 5)
12114     return 0;
12115
12116   if (modconst)
12117     *modconst = tmpconst;
12118
12119   if (elementwidth)
12120     *elementwidth = tmpwidth;
12121
12122   return 1;
12123 }
12124
12125 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12126    the immediate is valid, write a constant suitable for using as an operand
12127    to VSHR/VSHL to *MODCONST and the corresponding element width to
12128    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12129    because they have different limitations.  */
12130
12131 int
12132 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12133                                 rtx *modconst, int *elementwidth,
12134                                 bool isleftshift)
12135 {
12136   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12137   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12138   unsigned HOST_WIDE_INT last_elt = 0;
12139   unsigned HOST_WIDE_INT maxshift;
12140
12141   /* Split vector constant out into a byte vector.  */
12142   for (i = 0; i < n_elts; i++)
12143     {
12144       rtx el = CONST_VECTOR_ELT (op, i);
12145       unsigned HOST_WIDE_INT elpart;
12146
12147       if (CONST_INT_P (el))
12148         elpart = INTVAL (el);
12149       else if (CONST_DOUBLE_P (el))
12150         return 0;
12151       else
12152         gcc_unreachable ();
12153
12154       if (i != 0 && elpart != last_elt)
12155         return 0;
12156
12157       last_elt = elpart;
12158     }
12159
12160   /* Shift less than element size.  */
12161   maxshift = innersize * 8;
12162
12163   if (isleftshift)
12164     {
12165       /* Left shift immediate value can be from 0 to <size>-1.  */
12166       if (last_elt >= maxshift)
12167         return 0;
12168     }
12169   else
12170     {
12171       /* Right shift immediate value can be from 1 to <size>.  */
12172       if (last_elt == 0 || last_elt > maxshift)
12173         return 0;
12174     }
12175
12176   if (elementwidth)
12177     *elementwidth = innersize * 8;
12178
12179   if (modconst)
12180     *modconst = CONST_VECTOR_ELT (op, 0);
12181
12182   return 1;
12183 }
12184
12185 /* Return a string suitable for output of Neon immediate logic operation
12186    MNEM.  */
12187
12188 char *
12189 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12190                              int inverse, int quad)
12191 {
12192   int width, is_valid;
12193   static char templ[40];
12194
12195   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12196
12197   gcc_assert (is_valid != 0);
12198
12199   if (quad)
12200     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12201   else
12202     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12203
12204   return templ;
12205 }
12206
12207 /* Return a string suitable for output of Neon immediate shift operation
12208    (VSHR or VSHL) MNEM.  */
12209
12210 char *
12211 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12212                              machine_mode mode, int quad,
12213                              bool isleftshift)
12214 {
12215   int width, is_valid;
12216   static char templ[40];
12217
12218   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12219   gcc_assert (is_valid != 0);
12220
12221   if (quad)
12222     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12223   else
12224     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12225
12226   return templ;
12227 }
12228
12229 /* Output a sequence of pairwise operations to implement a reduction.
12230    NOTE: We do "too much work" here, because pairwise operations work on two
12231    registers-worth of operands in one go. Unfortunately we can't exploit those
12232    extra calculations to do the full operation in fewer steps, I don't think.
12233    Although all vector elements of the result but the first are ignored, we
12234    actually calculate the same result in each of the elements. An alternative
12235    such as initially loading a vector with zero to use as each of the second
12236    operands would use up an additional register and take an extra instruction,
12237    for no particular gain.  */
12238
12239 void
12240 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12241                       rtx (*reduc) (rtx, rtx, rtx))
12242 {
12243   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12244   rtx tmpsum = op1;
12245
12246   for (i = parts / 2; i >= 1; i /= 2)
12247     {
12248       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12249       emit_insn (reduc (dest, tmpsum, tmpsum));
12250       tmpsum = dest;
12251     }
12252 }
12253
12254 /* If VALS is a vector constant that can be loaded into a register
12255    using VDUP, generate instructions to do so and return an RTX to
12256    assign to the register.  Otherwise return NULL_RTX.  */
12257
12258 static rtx
12259 neon_vdup_constant (rtx vals)
12260 {
12261   machine_mode mode = GET_MODE (vals);
12262   machine_mode inner_mode = GET_MODE_INNER (mode);
12263   rtx x;
12264
12265   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12266     return NULL_RTX;
12267
12268   if (!const_vec_duplicate_p (vals, &x))
12269     /* The elements are not all the same.  We could handle repeating
12270        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12271        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12272        vdup.i16).  */
12273     return NULL_RTX;
12274
12275   /* We can load this constant by using VDUP and a constant in a
12276      single ARM register.  This will be cheaper than a vector
12277      load.  */
12278
12279   x = copy_to_mode_reg (inner_mode, x);
12280   return gen_vec_duplicate (mode, x);
12281 }
12282
12283 /* Generate code to load VALS, which is a PARALLEL containing only
12284    constants (for vec_init) or CONST_VECTOR, efficiently into a
12285    register.  Returns an RTX to copy into the register, or NULL_RTX
12286    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12287
12288 rtx
12289 neon_make_constant (rtx vals)
12290 {
12291   machine_mode mode = GET_MODE (vals);
12292   rtx target;
12293   rtx const_vec = NULL_RTX;
12294   int n_elts = GET_MODE_NUNITS (mode);
12295   int n_const = 0;
12296   int i;
12297
12298   if (GET_CODE (vals) == CONST_VECTOR)
12299     const_vec = vals;
12300   else if (GET_CODE (vals) == PARALLEL)
12301     {
12302       /* A CONST_VECTOR must contain only CONST_INTs and
12303          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12304          Only store valid constants in a CONST_VECTOR.  */
12305       for (i = 0; i < n_elts; ++i)
12306         {
12307           rtx x = XVECEXP (vals, 0, i);
12308           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12309             n_const++;
12310         }
12311       if (n_const == n_elts)
12312         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12313     }
12314   else
12315     gcc_unreachable ();
12316
12317   if (const_vec != NULL
12318       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12319     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12320     return const_vec;
12321   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12322     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12323        pipeline cycle; creating the constant takes one or two ARM
12324        pipeline cycles.  */
12325     return target;
12326   else if (const_vec != NULL_RTX)
12327     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12328        (for either double or quad vectors).  We can not take advantage
12329        of single-cycle VLD1 because we need a PC-relative addressing
12330        mode.  */
12331     return const_vec;
12332   else
12333     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12334        We can not construct an initializer.  */
12335     return NULL_RTX;
12336 }
12337
12338 /* Initialize vector TARGET to VALS.  */
12339
12340 void
12341 neon_expand_vector_init (rtx target, rtx vals)
12342 {
12343   machine_mode mode = GET_MODE (target);
12344   machine_mode inner_mode = GET_MODE_INNER (mode);
12345   int n_elts = GET_MODE_NUNITS (mode);
12346   int n_var = 0, one_var = -1;
12347   bool all_same = true;
12348   rtx x, mem;
12349   int i;
12350
12351   for (i = 0; i < n_elts; ++i)
12352     {
12353       x = XVECEXP (vals, 0, i);
12354       if (!CONSTANT_P (x))
12355         ++n_var, one_var = i;
12356
12357       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12358         all_same = false;
12359     }
12360
12361   if (n_var == 0)
12362     {
12363       rtx constant = neon_make_constant (vals);
12364       if (constant != NULL_RTX)
12365         {
12366           emit_move_insn (target, constant);
12367           return;
12368         }
12369     }
12370
12371   /* Splat a single non-constant element if we can.  */
12372   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12373     {
12374       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12375       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12376       return;
12377     }
12378
12379   /* One field is non-constant.  Load constant then overwrite varying
12380      field.  This is more efficient than using the stack.  */
12381   if (n_var == 1)
12382     {
12383       rtx copy = copy_rtx (vals);
12384       rtx index = GEN_INT (one_var);
12385
12386       /* Load constant part of vector, substitute neighboring value for
12387          varying element.  */
12388       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12389       neon_expand_vector_init (target, copy);
12390
12391       /* Insert variable.  */
12392       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12393       switch (mode)
12394         {
12395         case E_V8QImode:
12396           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12397           break;
12398         case E_V16QImode:
12399           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12400           break;
12401         case E_V4HImode:
12402           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12403           break;
12404         case E_V8HImode:
12405           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12406           break;
12407         case E_V2SImode:
12408           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12409           break;
12410         case E_V4SImode:
12411           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12412           break;
12413         case E_V2SFmode:
12414           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12415           break;
12416         case E_V4SFmode:
12417           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12418           break;
12419         case E_V2DImode:
12420           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12421           break;
12422         default:
12423           gcc_unreachable ();
12424         }
12425       return;
12426     }
12427
12428   /* Construct the vector in memory one field at a time
12429      and load the whole vector.  */
12430   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12431   for (i = 0; i < n_elts; i++)
12432     emit_move_insn (adjust_address_nv (mem, inner_mode,
12433                                     i * GET_MODE_SIZE (inner_mode)),
12434                     XVECEXP (vals, 0, i));
12435   emit_move_insn (target, mem);
12436 }
12437
12438 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12439    ERR if it doesn't.  EXP indicates the source location, which includes the
12440    inlining history for intrinsics.  */
12441
12442 static void
12443 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12444               const_tree exp, const char *desc)
12445 {
12446   HOST_WIDE_INT lane;
12447
12448   gcc_assert (CONST_INT_P (operand));
12449
12450   lane = INTVAL (operand);
12451
12452   if (lane < low || lane >= high)
12453     {
12454       if (exp)
12455         error ("%K%s %wd out of range %wd - %wd",
12456                exp, desc, lane, low, high - 1);
12457       else
12458         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12459     }
12460 }
12461
12462 /* Bounds-check lanes.  */
12463
12464 void
12465 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12466                   const_tree exp)
12467 {
12468   bounds_check (operand, low, high, exp, "lane");
12469 }
12470
12471 /* Bounds-check constants.  */
12472
12473 void
12474 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12475 {
12476   bounds_check (operand, low, high, NULL_TREE, "constant");
12477 }
12478
12479 HOST_WIDE_INT
12480 neon_element_bits (machine_mode mode)
12481 {
12482   return GET_MODE_UNIT_BITSIZE (mode);
12483 }
12484
12485 \f
12486 /* Predicates for `match_operand' and `match_operator'.  */
12487
12488 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12489    WB is true if full writeback address modes are allowed and is false
12490    if limited writeback address modes (POST_INC and PRE_DEC) are
12491    allowed.  */
12492
12493 int
12494 arm_coproc_mem_operand (rtx op, bool wb)
12495 {
12496   rtx ind;
12497
12498   /* Reject eliminable registers.  */
12499   if (! (reload_in_progress || reload_completed || lra_in_progress)
12500       && (   reg_mentioned_p (frame_pointer_rtx, op)
12501           || reg_mentioned_p (arg_pointer_rtx, op)
12502           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12503           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12504           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12505           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12506     return FALSE;
12507
12508   /* Constants are converted into offsets from labels.  */
12509   if (!MEM_P (op))
12510     return FALSE;
12511
12512   ind = XEXP (op, 0);
12513
12514   if (reload_completed
12515       && (GET_CODE (ind) == LABEL_REF
12516           || (GET_CODE (ind) == CONST
12517               && GET_CODE (XEXP (ind, 0)) == PLUS
12518               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12519               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12520     return TRUE;
12521
12522   /* Match: (mem (reg)).  */
12523   if (REG_P (ind))
12524     return arm_address_register_rtx_p (ind, 0);
12525
12526   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12527      acceptable in any case (subject to verification by
12528      arm_address_register_rtx_p).  We need WB to be true to accept
12529      PRE_INC and POST_DEC.  */
12530   if (GET_CODE (ind) == POST_INC
12531       || GET_CODE (ind) == PRE_DEC
12532       || (wb
12533           && (GET_CODE (ind) == PRE_INC
12534               || GET_CODE (ind) == POST_DEC)))
12535     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12536
12537   if (wb
12538       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12539       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12540       && GET_CODE (XEXP (ind, 1)) == PLUS
12541       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12542     ind = XEXP (ind, 1);
12543
12544   /* Match:
12545      (plus (reg)
12546            (const)).  */
12547   if (GET_CODE (ind) == PLUS
12548       && REG_P (XEXP (ind, 0))
12549       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12550       && CONST_INT_P (XEXP (ind, 1))
12551       && INTVAL (XEXP (ind, 1)) > -1024
12552       && INTVAL (XEXP (ind, 1)) <  1024
12553       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12554     return TRUE;
12555
12556   return FALSE;
12557 }
12558
12559 /* Return TRUE if OP is a memory operand which we can load or store a vector
12560    to/from. TYPE is one of the following values:
12561     0 - Vector load/stor (vldr)
12562     1 - Core registers (ldm)
12563     2 - Element/structure loads (vld1)
12564  */
12565 int
12566 neon_vector_mem_operand (rtx op, int type, bool strict)
12567 {
12568   rtx ind;
12569
12570   /* Reject eliminable registers.  */
12571   if (strict && ! (reload_in_progress || reload_completed)
12572       && (reg_mentioned_p (frame_pointer_rtx, op)
12573           || reg_mentioned_p (arg_pointer_rtx, op)
12574           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12575           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12576           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12577           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12578     return FALSE;
12579
12580   /* Constants are converted into offsets from labels.  */
12581   if (!MEM_P (op))
12582     return FALSE;
12583
12584   ind = XEXP (op, 0);
12585
12586   if (reload_completed
12587       && (GET_CODE (ind) == LABEL_REF
12588           || (GET_CODE (ind) == CONST
12589               && GET_CODE (XEXP (ind, 0)) == PLUS
12590               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12591               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12592     return TRUE;
12593
12594   /* Match: (mem (reg)).  */
12595   if (REG_P (ind))
12596     return arm_address_register_rtx_p (ind, 0);
12597
12598   /* Allow post-increment with Neon registers.  */
12599   if ((type != 1 && GET_CODE (ind) == POST_INC)
12600       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12601     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12602
12603   /* Allow post-increment by register for VLDn */
12604   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12605       && GET_CODE (XEXP (ind, 1)) == PLUS
12606       && REG_P (XEXP (XEXP (ind, 1), 1)))
12607      return true;
12608
12609   /* Match:
12610      (plus (reg)
12611           (const)).  */
12612   if (type == 0
12613       && GET_CODE (ind) == PLUS
12614       && REG_P (XEXP (ind, 0))
12615       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12616       && CONST_INT_P (XEXP (ind, 1))
12617       && INTVAL (XEXP (ind, 1)) > -1024
12618       /* For quad modes, we restrict the constant offset to be slightly less
12619          than what the instruction format permits.  We have no such constraint
12620          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12621       && (INTVAL (XEXP (ind, 1))
12622           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12623       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12624     return TRUE;
12625
12626   return FALSE;
12627 }
12628
12629 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12630    type.  */
12631 int
12632 neon_struct_mem_operand (rtx op)
12633 {
12634   rtx ind;
12635
12636   /* Reject eliminable registers.  */
12637   if (! (reload_in_progress || reload_completed)
12638       && (   reg_mentioned_p (frame_pointer_rtx, op)
12639           || reg_mentioned_p (arg_pointer_rtx, op)
12640           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12641           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12642           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12643           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12644     return FALSE;
12645
12646   /* Constants are converted into offsets from labels.  */
12647   if (!MEM_P (op))
12648     return FALSE;
12649
12650   ind = XEXP (op, 0);
12651
12652   if (reload_completed
12653       && (GET_CODE (ind) == LABEL_REF
12654           || (GET_CODE (ind) == CONST
12655               && GET_CODE (XEXP (ind, 0)) == PLUS
12656               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12657               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12658     return TRUE;
12659
12660   /* Match: (mem (reg)).  */
12661   if (REG_P (ind))
12662     return arm_address_register_rtx_p (ind, 0);
12663
12664   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12665   if (GET_CODE (ind) == POST_INC
12666       || GET_CODE (ind) == PRE_DEC)
12667     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12668
12669   return FALSE;
12670 }
12671
12672 /* Return true if X is a register that will be eliminated later on.  */
12673 int
12674 arm_eliminable_register (rtx x)
12675 {
12676   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12677                        || REGNO (x) == ARG_POINTER_REGNUM
12678                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12679                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12680 }
12681
12682 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12683    coprocessor registers.  Otherwise return NO_REGS.  */
12684
12685 enum reg_class
12686 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12687 {
12688   if (mode == HFmode)
12689     {
12690       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12691         return GENERAL_REGS;
12692       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12693         return NO_REGS;
12694       return GENERAL_REGS;
12695     }
12696
12697   /* The neon move patterns handle all legitimate vector and struct
12698      addresses.  */
12699   if (TARGET_NEON
12700       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12701       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12702           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12703           || VALID_NEON_STRUCT_MODE (mode)))
12704     return NO_REGS;
12705
12706   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12707     return NO_REGS;
12708
12709   return GENERAL_REGS;
12710 }
12711
12712 /* Values which must be returned in the most-significant end of the return
12713    register.  */
12714
12715 static bool
12716 arm_return_in_msb (const_tree valtype)
12717 {
12718   return (TARGET_AAPCS_BASED
12719           && BYTES_BIG_ENDIAN
12720           && (AGGREGATE_TYPE_P (valtype)
12721               || TREE_CODE (valtype) == COMPLEX_TYPE
12722               || FIXED_POINT_TYPE_P (valtype)));
12723 }
12724
12725 /* Return TRUE if X references a SYMBOL_REF.  */
12726 int
12727 symbol_mentioned_p (rtx x)
12728 {
12729   const char * fmt;
12730   int i;
12731
12732   if (GET_CODE (x) == SYMBOL_REF)
12733     return 1;
12734
12735   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12736      are constant offsets, not symbols.  */
12737   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12738     return 0;
12739
12740   fmt = GET_RTX_FORMAT (GET_CODE (x));
12741
12742   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12743     {
12744       if (fmt[i] == 'E')
12745         {
12746           int j;
12747
12748           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12749             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12750               return 1;
12751         }
12752       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12753         return 1;
12754     }
12755
12756   return 0;
12757 }
12758
12759 /* Return TRUE if X references a LABEL_REF.  */
12760 int
12761 label_mentioned_p (rtx x)
12762 {
12763   const char * fmt;
12764   int i;
12765
12766   if (GET_CODE (x) == LABEL_REF)
12767     return 1;
12768
12769   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12770      instruction, but they are constant offsets, not symbols.  */
12771   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12772     return 0;
12773
12774   fmt = GET_RTX_FORMAT (GET_CODE (x));
12775   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12776     {
12777       if (fmt[i] == 'E')
12778         {
12779           int j;
12780
12781           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12782             if (label_mentioned_p (XVECEXP (x, i, j)))
12783               return 1;
12784         }
12785       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12786         return 1;
12787     }
12788
12789   return 0;
12790 }
12791
12792 int
12793 tls_mentioned_p (rtx x)
12794 {
12795   switch (GET_CODE (x))
12796     {
12797     case CONST:
12798       return tls_mentioned_p (XEXP (x, 0));
12799
12800     case UNSPEC:
12801       if (XINT (x, 1) == UNSPEC_TLS)
12802         return 1;
12803
12804     /* Fall through.  */
12805     default:
12806       return 0;
12807     }
12808 }
12809
12810 /* Must not copy any rtx that uses a pc-relative address.
12811    Also, disallow copying of load-exclusive instructions that
12812    may appear after splitting of compare-and-swap-style operations
12813    so as to prevent those loops from being transformed away from their
12814    canonical forms (see PR 69904).  */
12815
12816 static bool
12817 arm_cannot_copy_insn_p (rtx_insn *insn)
12818 {
12819   /* The tls call insn cannot be copied, as it is paired with a data
12820      word.  */
12821   if (recog_memoized (insn) == CODE_FOR_tlscall)
12822     return true;
12823
12824   subrtx_iterator::array_type array;
12825   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12826     {
12827       const_rtx x = *iter;
12828       if (GET_CODE (x) == UNSPEC
12829           && (XINT (x, 1) == UNSPEC_PIC_BASE
12830               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12831         return true;
12832     }
12833
12834   rtx set = single_set (insn);
12835   if (set)
12836     {
12837       rtx src = SET_SRC (set);
12838       if (GET_CODE (src) == ZERO_EXTEND)
12839         src = XEXP (src, 0);
12840
12841       /* Catch the load-exclusive and load-acquire operations.  */
12842       if (GET_CODE (src) == UNSPEC_VOLATILE
12843           && (XINT (src, 1) == VUNSPEC_LL
12844               || XINT (src, 1) == VUNSPEC_LAX))
12845         return true;
12846     }
12847   return false;
12848 }
12849
12850 enum rtx_code
12851 minmax_code (rtx x)
12852 {
12853   enum rtx_code code = GET_CODE (x);
12854
12855   switch (code)
12856     {
12857     case SMAX:
12858       return GE;
12859     case SMIN:
12860       return LE;
12861     case UMIN:
12862       return LEU;
12863     case UMAX:
12864       return GEU;
12865     default:
12866       gcc_unreachable ();
12867     }
12868 }
12869
12870 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12871
12872 bool
12873 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12874                         int *mask, bool *signed_sat)
12875 {
12876   /* The high bound must be a power of two minus one.  */
12877   int log = exact_log2 (INTVAL (hi_bound) + 1);
12878   if (log == -1)
12879     return false;
12880
12881   /* The low bound is either zero (for usat) or one less than the
12882      negation of the high bound (for ssat).  */
12883   if (INTVAL (lo_bound) == 0)
12884     {
12885       if (mask)
12886         *mask = log;
12887       if (signed_sat)
12888         *signed_sat = false;
12889
12890       return true;
12891     }
12892
12893   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12894     {
12895       if (mask)
12896         *mask = log + 1;
12897       if (signed_sat)
12898         *signed_sat = true;
12899
12900       return true;
12901     }
12902
12903   return false;
12904 }
12905
12906 /* Return 1 if memory locations are adjacent.  */
12907 int
12908 adjacent_mem_locations (rtx a, rtx b)
12909 {
12910   /* We don't guarantee to preserve the order of these memory refs.  */
12911   if (volatile_refs_p (a) || volatile_refs_p (b))
12912     return 0;
12913
12914   if ((REG_P (XEXP (a, 0))
12915        || (GET_CODE (XEXP (a, 0)) == PLUS
12916            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12917       && (REG_P (XEXP (b, 0))
12918           || (GET_CODE (XEXP (b, 0)) == PLUS
12919               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12920     {
12921       HOST_WIDE_INT val0 = 0, val1 = 0;
12922       rtx reg0, reg1;
12923       int val_diff;
12924
12925       if (GET_CODE (XEXP (a, 0)) == PLUS)
12926         {
12927           reg0 = XEXP (XEXP (a, 0), 0);
12928           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12929         }
12930       else
12931         reg0 = XEXP (a, 0);
12932
12933       if (GET_CODE (XEXP (b, 0)) == PLUS)
12934         {
12935           reg1 = XEXP (XEXP (b, 0), 0);
12936           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12937         }
12938       else
12939         reg1 = XEXP (b, 0);
12940
12941       /* Don't accept any offset that will require multiple
12942          instructions to handle, since this would cause the
12943          arith_adjacentmem pattern to output an overlong sequence.  */
12944       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12945         return 0;
12946
12947       /* Don't allow an eliminable register: register elimination can make
12948          the offset too large.  */
12949       if (arm_eliminable_register (reg0))
12950         return 0;
12951
12952       val_diff = val1 - val0;
12953
12954       if (arm_ld_sched)
12955         {
12956           /* If the target has load delay slots, then there's no benefit
12957              to using an ldm instruction unless the offset is zero and
12958              we are optimizing for size.  */
12959           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12960                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12961                   && (val_diff == 4 || val_diff == -4));
12962         }
12963
12964       return ((REGNO (reg0) == REGNO (reg1))
12965               && (val_diff == 4 || val_diff == -4));
12966     }
12967
12968   return 0;
12969 }
12970
12971 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12972    for load operations, false for store operations.  CONSECUTIVE is true
12973    if the register numbers in the operation must be consecutive in the register
12974    bank. RETURN_PC is true if value is to be loaded in PC.
12975    The pattern we are trying to match for load is:
12976      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12977       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12978        :
12979        :
12980       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12981      ]
12982      where
12983      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12984      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12985      3.  If consecutive is TRUE, then for kth register being loaded,
12986          REGNO (R_dk) = REGNO (R_d0) + k.
12987    The pattern for store is similar.  */
12988 bool
12989 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12990                      bool consecutive, bool return_pc)
12991 {
12992   HOST_WIDE_INT count = XVECLEN (op, 0);
12993   rtx reg, mem, addr;
12994   unsigned regno;
12995   unsigned first_regno;
12996   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12997   rtx elt;
12998   bool addr_reg_in_reglist = false;
12999   bool update = false;
13000   int reg_increment;
13001   int offset_adj;
13002   int regs_per_val;
13003
13004   /* If not in SImode, then registers must be consecutive
13005      (e.g., VLDM instructions for DFmode).  */
13006   gcc_assert ((mode == SImode) || consecutive);
13007   /* Setting return_pc for stores is illegal.  */
13008   gcc_assert (!return_pc || load);
13009
13010   /* Set up the increments and the regs per val based on the mode.  */
13011   reg_increment = GET_MODE_SIZE (mode);
13012   regs_per_val = reg_increment / 4;
13013   offset_adj = return_pc ? 1 : 0;
13014
13015   if (count <= 1
13016       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13017       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13018     return false;
13019
13020   /* Check if this is a write-back.  */
13021   elt = XVECEXP (op, 0, offset_adj);
13022   if (GET_CODE (SET_SRC (elt)) == PLUS)
13023     {
13024       i++;
13025       base = 1;
13026       update = true;
13027
13028       /* The offset adjustment must be the number of registers being
13029          popped times the size of a single register.  */
13030       if (!REG_P (SET_DEST (elt))
13031           || !REG_P (XEXP (SET_SRC (elt), 0))
13032           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13033           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13034           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13035              ((count - 1 - offset_adj) * reg_increment))
13036         return false;
13037     }
13038
13039   i = i + offset_adj;
13040   base = base + offset_adj;
13041   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13042      success depends on the type: VLDM can do just one reg,
13043      LDM must do at least two.  */
13044   if ((count <= i) && (mode == SImode))
13045       return false;
13046
13047   elt = XVECEXP (op, 0, i - 1);
13048   if (GET_CODE (elt) != SET)
13049     return false;
13050
13051   if (load)
13052     {
13053       reg = SET_DEST (elt);
13054       mem = SET_SRC (elt);
13055     }
13056   else
13057     {
13058       reg = SET_SRC (elt);
13059       mem = SET_DEST (elt);
13060     }
13061
13062   if (!REG_P (reg) || !MEM_P (mem))
13063     return false;
13064
13065   regno = REGNO (reg);
13066   first_regno = regno;
13067   addr = XEXP (mem, 0);
13068   if (GET_CODE (addr) == PLUS)
13069     {
13070       if (!CONST_INT_P (XEXP (addr, 1)))
13071         return false;
13072
13073       offset = INTVAL (XEXP (addr, 1));
13074       addr = XEXP (addr, 0);
13075     }
13076
13077   if (!REG_P (addr))
13078     return false;
13079
13080   /* Don't allow SP to be loaded unless it is also the base register. It
13081      guarantees that SP is reset correctly when an LDM instruction
13082      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13083   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13084     return false;
13085
13086   for (; i < count; i++)
13087     {
13088       elt = XVECEXP (op, 0, i);
13089       if (GET_CODE (elt) != SET)
13090         return false;
13091
13092       if (load)
13093         {
13094           reg = SET_DEST (elt);
13095           mem = SET_SRC (elt);
13096         }
13097       else
13098         {
13099           reg = SET_SRC (elt);
13100           mem = SET_DEST (elt);
13101         }
13102
13103       if (!REG_P (reg)
13104           || GET_MODE (reg) != mode
13105           || REGNO (reg) <= regno
13106           || (consecutive
13107               && (REGNO (reg) !=
13108                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13109           /* Don't allow SP to be loaded unless it is also the base register. It
13110              guarantees that SP is reset correctly when an LDM instruction
13111              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13112           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13113           || !MEM_P (mem)
13114           || GET_MODE (mem) != mode
13115           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13116                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13117                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13118                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13119                    offset + (i - base) * reg_increment))
13120               && (!REG_P (XEXP (mem, 0))
13121                   || offset + (i - base) * reg_increment != 0)))
13122         return false;
13123
13124       regno = REGNO (reg);
13125       if (regno == REGNO (addr))
13126         addr_reg_in_reglist = true;
13127     }
13128
13129   if (load)
13130     {
13131       if (update && addr_reg_in_reglist)
13132         return false;
13133
13134       /* For Thumb-1, address register is always modified - either by write-back
13135          or by explicit load.  If the pattern does not describe an update,
13136          then the address register must be in the list of loaded registers.  */
13137       if (TARGET_THUMB1)
13138         return update || addr_reg_in_reglist;
13139     }
13140
13141   return true;
13142 }
13143
13144 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13145    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13146    instruction.  ADD_OFFSET is nonzero if the base address register needs
13147    to be modified with an add instruction before we can use it.  */
13148
13149 static bool
13150 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13151                                  int nops, HOST_WIDE_INT add_offset)
13152  {
13153   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13154      if the offset isn't small enough.  The reason 2 ldrs are faster
13155      is because these ARMs are able to do more than one cache access
13156      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13157      whilst the ARM8 has a double bandwidth cache.  This means that
13158      these cores can do both an instruction fetch and a data fetch in
13159      a single cycle, so the trick of calculating the address into a
13160      scratch register (one of the result regs) and then doing a load
13161      multiple actually becomes slower (and no smaller in code size).
13162      That is the transformation
13163
13164         ldr     rd1, [rbase + offset]
13165         ldr     rd2, [rbase + offset + 4]
13166
13167      to
13168
13169         add     rd1, rbase, offset
13170         ldmia   rd1, {rd1, rd2}
13171
13172      produces worse code -- '3 cycles + any stalls on rd2' instead of
13173      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13174      access per cycle, the first sequence could never complete in less
13175      than 6 cycles, whereas the ldm sequence would only take 5 and
13176      would make better use of sequential accesses if not hitting the
13177      cache.
13178
13179      We cheat here and test 'arm_ld_sched' which we currently know to
13180      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13181      changes, then the test below needs to be reworked.  */
13182   if (nops == 2 && arm_ld_sched && add_offset != 0)
13183     return false;
13184
13185   /* XScale has load-store double instructions, but they have stricter
13186      alignment requirements than load-store multiple, so we cannot
13187      use them.
13188
13189      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13190      the pipeline until completion.
13191
13192         NREGS           CYCLES
13193           1               3
13194           2               4
13195           3               5
13196           4               6
13197
13198      An ldr instruction takes 1-3 cycles, but does not block the
13199      pipeline.
13200
13201         NREGS           CYCLES
13202           1              1-3
13203           2              2-6
13204           3              3-9
13205           4              4-12
13206
13207      Best case ldr will always win.  However, the more ldr instructions
13208      we issue, the less likely we are to be able to schedule them well.
13209      Using ldr instructions also increases code size.
13210
13211      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13212      for counts of 3 or 4 regs.  */
13213   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13214     return false;
13215   return true;
13216 }
13217
13218 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13219    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13220    an array ORDER which describes the sequence to use when accessing the
13221    offsets that produces an ascending order.  In this sequence, each
13222    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13223    must have been filled in with the lowest offset by the caller.
13224    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13225    we use to verify that ORDER produces an ascending order of registers.
13226    Return true if it was possible to construct such an order, false if
13227    not.  */
13228
13229 static bool
13230 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13231                       int *unsorted_regs)
13232 {
13233   int i;
13234   for (i = 1; i < nops; i++)
13235     {
13236       int j;
13237
13238       order[i] = order[i - 1];
13239       for (j = 0; j < nops; j++)
13240         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13241           {
13242             /* We must find exactly one offset that is higher than the
13243                previous one by 4.  */
13244             if (order[i] != order[i - 1])
13245               return false;
13246             order[i] = j;
13247           }
13248       if (order[i] == order[i - 1])
13249         return false;
13250       /* The register numbers must be ascending.  */
13251       if (unsorted_regs != NULL
13252           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13253         return false;
13254     }
13255   return true;
13256 }
13257
13258 /* Used to determine in a peephole whether a sequence of load
13259    instructions can be changed into a load-multiple instruction.
13260    NOPS is the number of separate load instructions we are examining.  The
13261    first NOPS entries in OPERANDS are the destination registers, the
13262    next NOPS entries are memory operands.  If this function is
13263    successful, *BASE is set to the common base register of the memory
13264    accesses; *LOAD_OFFSET is set to the first memory location's offset
13265    from that base register.
13266    REGS is an array filled in with the destination register numbers.
13267    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13268    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13269    the sequence of registers in REGS matches the loads from ascending memory
13270    locations, and the function verifies that the register numbers are
13271    themselves ascending.  If CHECK_REGS is false, the register numbers
13272    are stored in the order they are found in the operands.  */
13273 static int
13274 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13275                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13276 {
13277   int unsorted_regs[MAX_LDM_STM_OPS];
13278   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13279   int order[MAX_LDM_STM_OPS];
13280   rtx base_reg_rtx = NULL;
13281   int base_reg = -1;
13282   int i, ldm_case;
13283
13284   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13285      easily extended if required.  */
13286   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13287
13288   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13289
13290   /* Loop over the operands and check that the memory references are
13291      suitable (i.e. immediate offsets from the same base register).  At
13292      the same time, extract the target register, and the memory
13293      offsets.  */
13294   for (i = 0; i < nops; i++)
13295     {
13296       rtx reg;
13297       rtx offset;
13298
13299       /* Convert a subreg of a mem into the mem itself.  */
13300       if (GET_CODE (operands[nops + i]) == SUBREG)
13301         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13302
13303       gcc_assert (MEM_P (operands[nops + i]));
13304
13305       /* Don't reorder volatile memory references; it doesn't seem worth
13306          looking for the case where the order is ok anyway.  */
13307       if (MEM_VOLATILE_P (operands[nops + i]))
13308         return 0;
13309
13310       offset = const0_rtx;
13311
13312       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13313            || (GET_CODE (reg) == SUBREG
13314                && REG_P (reg = SUBREG_REG (reg))))
13315           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13316               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13317                   || (GET_CODE (reg) == SUBREG
13318                       && REG_P (reg = SUBREG_REG (reg))))
13319               && (CONST_INT_P (offset
13320                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13321         {
13322           if (i == 0)
13323             {
13324               base_reg = REGNO (reg);
13325               base_reg_rtx = reg;
13326               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13327                 return 0;
13328             }
13329           else if (base_reg != (int) REGNO (reg))
13330             /* Not addressed from the same base register.  */
13331             return 0;
13332
13333           unsorted_regs[i] = (REG_P (operands[i])
13334                               ? REGNO (operands[i])
13335                               : REGNO (SUBREG_REG (operands[i])));
13336
13337           /* If it isn't an integer register, or if it overwrites the
13338              base register but isn't the last insn in the list, then
13339              we can't do this.  */
13340           if (unsorted_regs[i] < 0
13341               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13342               || unsorted_regs[i] > 14
13343               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13344             return 0;
13345
13346           /* Don't allow SP to be loaded unless it is also the base
13347              register.  It guarantees that SP is reset correctly when
13348              an LDM instruction is interrupted.  Otherwise, we might
13349              end up with a corrupt stack.  */
13350           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13351             return 0;
13352
13353           unsorted_offsets[i] = INTVAL (offset);
13354           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13355             order[0] = i;
13356         }
13357       else
13358         /* Not a suitable memory address.  */
13359         return 0;
13360     }
13361
13362   /* All the useful information has now been extracted from the
13363      operands into unsorted_regs and unsorted_offsets; additionally,
13364      order[0] has been set to the lowest offset in the list.  Sort
13365      the offsets into order, verifying that they are adjacent, and
13366      check that the register numbers are ascending.  */
13367   if (!compute_offset_order (nops, unsorted_offsets, order,
13368                              check_regs ? unsorted_regs : NULL))
13369     return 0;
13370
13371   if (saved_order)
13372     memcpy (saved_order, order, sizeof order);
13373
13374   if (base)
13375     {
13376       *base = base_reg;
13377
13378       for (i = 0; i < nops; i++)
13379         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13380
13381       *load_offset = unsorted_offsets[order[0]];
13382     }
13383
13384   if (TARGET_THUMB1
13385       && !peep2_reg_dead_p (nops, base_reg_rtx))
13386     return 0;
13387
13388   if (unsorted_offsets[order[0]] == 0)
13389     ldm_case = 1; /* ldmia */
13390   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13391     ldm_case = 2; /* ldmib */
13392   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13393     ldm_case = 3; /* ldmda */
13394   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13395     ldm_case = 4; /* ldmdb */
13396   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13397            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13398     ldm_case = 5;
13399   else
13400     return 0;
13401
13402   if (!multiple_operation_profitable_p (false, nops,
13403                                         ldm_case == 5
13404                                         ? unsorted_offsets[order[0]] : 0))
13405     return 0;
13406
13407   return ldm_case;
13408 }
13409
13410 /* Used to determine in a peephole whether a sequence of store instructions can
13411    be changed into a store-multiple instruction.
13412    NOPS is the number of separate store instructions we are examining.
13413    NOPS_TOTAL is the total number of instructions recognized by the peephole
13414    pattern.
13415    The first NOPS entries in OPERANDS are the source registers, the next
13416    NOPS entries are memory operands.  If this function is successful, *BASE is
13417    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13418    to the first memory location's offset from that base register.  REGS is an
13419    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13420    likewise filled with the corresponding rtx's.
13421    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13422    numbers to an ascending order of stores.
13423    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13424    from ascending memory locations, and the function verifies that the register
13425    numbers are themselves ascending.  If CHECK_REGS is false, the register
13426    numbers are stored in the order they are found in the operands.  */
13427 static int
13428 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13429                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13430                          HOST_WIDE_INT *load_offset, bool check_regs)
13431 {
13432   int unsorted_regs[MAX_LDM_STM_OPS];
13433   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13434   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13435   int order[MAX_LDM_STM_OPS];
13436   int base_reg = -1;
13437   rtx base_reg_rtx = NULL;
13438   int i, stm_case;
13439
13440   /* Write back of base register is currently only supported for Thumb 1.  */
13441   int base_writeback = TARGET_THUMB1;
13442
13443   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13444      easily extended if required.  */
13445   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13446
13447   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13448
13449   /* Loop over the operands and check that the memory references are
13450      suitable (i.e. immediate offsets from the same base register).  At
13451      the same time, extract the target register, and the memory
13452      offsets.  */
13453   for (i = 0; i < nops; i++)
13454     {
13455       rtx reg;
13456       rtx offset;
13457
13458       /* Convert a subreg of a mem into the mem itself.  */
13459       if (GET_CODE (operands[nops + i]) == SUBREG)
13460         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13461
13462       gcc_assert (MEM_P (operands[nops + i]));
13463
13464       /* Don't reorder volatile memory references; it doesn't seem worth
13465          looking for the case where the order is ok anyway.  */
13466       if (MEM_VOLATILE_P (operands[nops + i]))
13467         return 0;
13468
13469       offset = const0_rtx;
13470
13471       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13472            || (GET_CODE (reg) == SUBREG
13473                && REG_P (reg = SUBREG_REG (reg))))
13474           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13475               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13476                   || (GET_CODE (reg) == SUBREG
13477                       && REG_P (reg = SUBREG_REG (reg))))
13478               && (CONST_INT_P (offset
13479                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13480         {
13481           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13482                                   ? operands[i] : SUBREG_REG (operands[i]));
13483           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13484
13485           if (i == 0)
13486             {
13487               base_reg = REGNO (reg);
13488               base_reg_rtx = reg;
13489               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13490                 return 0;
13491             }
13492           else if (base_reg != (int) REGNO (reg))
13493             /* Not addressed from the same base register.  */
13494             return 0;
13495
13496           /* If it isn't an integer register, then we can't do this.  */
13497           if (unsorted_regs[i] < 0
13498               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13499               /* The effects are unpredictable if the base register is
13500                  both updated and stored.  */
13501               || (base_writeback && unsorted_regs[i] == base_reg)
13502               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13503               || unsorted_regs[i] > 14)
13504             return 0;
13505
13506           unsorted_offsets[i] = INTVAL (offset);
13507           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13508             order[0] = i;
13509         }
13510       else
13511         /* Not a suitable memory address.  */
13512         return 0;
13513     }
13514
13515   /* All the useful information has now been extracted from the
13516      operands into unsorted_regs and unsorted_offsets; additionally,
13517      order[0] has been set to the lowest offset in the list.  Sort
13518      the offsets into order, verifying that they are adjacent, and
13519      check that the register numbers are ascending.  */
13520   if (!compute_offset_order (nops, unsorted_offsets, order,
13521                              check_regs ? unsorted_regs : NULL))
13522     return 0;
13523
13524   if (saved_order)
13525     memcpy (saved_order, order, sizeof order);
13526
13527   if (base)
13528     {
13529       *base = base_reg;
13530
13531       for (i = 0; i < nops; i++)
13532         {
13533           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13534           if (reg_rtxs)
13535             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13536         }
13537
13538       *load_offset = unsorted_offsets[order[0]];
13539     }
13540
13541   if (TARGET_THUMB1
13542       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13543     return 0;
13544
13545   if (unsorted_offsets[order[0]] == 0)
13546     stm_case = 1; /* stmia */
13547   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13548     stm_case = 2; /* stmib */
13549   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13550     stm_case = 3; /* stmda */
13551   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13552     stm_case = 4; /* stmdb */
13553   else
13554     return 0;
13555
13556   if (!multiple_operation_profitable_p (false, nops, 0))
13557     return 0;
13558
13559   return stm_case;
13560 }
13561 \f
13562 /* Routines for use in generating RTL.  */
13563
13564 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13565    the instruction; REGS and MEMS are arrays containing the operands.
13566    BASEREG is the base register to be used in addressing the memory operands.
13567    WBACK_OFFSET is nonzero if the instruction should update the base
13568    register.  */
13569
13570 static rtx
13571 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13572                          HOST_WIDE_INT wback_offset)
13573 {
13574   int i = 0, j;
13575   rtx result;
13576
13577   if (!multiple_operation_profitable_p (false, count, 0))
13578     {
13579       rtx seq;
13580
13581       start_sequence ();
13582
13583       for (i = 0; i < count; i++)
13584         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13585
13586       if (wback_offset != 0)
13587         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13588
13589       seq = get_insns ();
13590       end_sequence ();
13591
13592       return seq;
13593     }
13594
13595   result = gen_rtx_PARALLEL (VOIDmode,
13596                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13597   if (wback_offset != 0)
13598     {
13599       XVECEXP (result, 0, 0)
13600         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13601       i = 1;
13602       count++;
13603     }
13604
13605   for (j = 0; i < count; i++, j++)
13606     XVECEXP (result, 0, i)
13607       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13608
13609   return result;
13610 }
13611
13612 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13613    the instruction; REGS and MEMS are arrays containing the operands.
13614    BASEREG is the base register to be used in addressing the memory operands.
13615    WBACK_OFFSET is nonzero if the instruction should update the base
13616    register.  */
13617
13618 static rtx
13619 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13620                           HOST_WIDE_INT wback_offset)
13621 {
13622   int i = 0, j;
13623   rtx result;
13624
13625   if (GET_CODE (basereg) == PLUS)
13626     basereg = XEXP (basereg, 0);
13627
13628   if (!multiple_operation_profitable_p (false, count, 0))
13629     {
13630       rtx seq;
13631
13632       start_sequence ();
13633
13634       for (i = 0; i < count; i++)
13635         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13636
13637       if (wback_offset != 0)
13638         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13639
13640       seq = get_insns ();
13641       end_sequence ();
13642
13643       return seq;
13644     }
13645
13646   result = gen_rtx_PARALLEL (VOIDmode,
13647                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13648   if (wback_offset != 0)
13649     {
13650       XVECEXP (result, 0, 0)
13651         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13652       i = 1;
13653       count++;
13654     }
13655
13656   for (j = 0; i < count; i++, j++)
13657     XVECEXP (result, 0, i)
13658       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13659
13660   return result;
13661 }
13662
13663 /* Generate either a load-multiple or a store-multiple instruction.  This
13664    function can be used in situations where we can start with a single MEM
13665    rtx and adjust its address upwards.
13666    COUNT is the number of operations in the instruction, not counting a
13667    possible update of the base register.  REGS is an array containing the
13668    register operands.
13669    BASEREG is the base register to be used in addressing the memory operands,
13670    which are constructed from BASEMEM.
13671    WRITE_BACK specifies whether the generated instruction should include an
13672    update of the base register.
13673    OFFSETP is used to pass an offset to and from this function; this offset
13674    is not used when constructing the address (instead BASEMEM should have an
13675    appropriate offset in its address), it is used only for setting
13676    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13677
13678 static rtx
13679 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13680                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13681 {
13682   rtx mems[MAX_LDM_STM_OPS];
13683   HOST_WIDE_INT offset = *offsetp;
13684   int i;
13685
13686   gcc_assert (count <= MAX_LDM_STM_OPS);
13687
13688   if (GET_CODE (basereg) == PLUS)
13689     basereg = XEXP (basereg, 0);
13690
13691   for (i = 0; i < count; i++)
13692     {
13693       rtx addr = plus_constant (Pmode, basereg, i * 4);
13694       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13695       offset += 4;
13696     }
13697
13698   if (write_back)
13699     *offsetp = offset;
13700
13701   if (is_load)
13702     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13703                                     write_back ? 4 * count : 0);
13704   else
13705     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13706                                      write_back ? 4 * count : 0);
13707 }
13708
13709 rtx
13710 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13711                        rtx basemem, HOST_WIDE_INT *offsetp)
13712 {
13713   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13714                               offsetp);
13715 }
13716
13717 rtx
13718 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13719                         rtx basemem, HOST_WIDE_INT *offsetp)
13720 {
13721   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13722                               offsetp);
13723 }
13724
13725 /* Called from a peephole2 expander to turn a sequence of loads into an
13726    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13727    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13728    is true if we can reorder the registers because they are used commutatively
13729    subsequently.
13730    Returns true iff we could generate a new instruction.  */
13731
13732 bool
13733 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13734 {
13735   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13736   rtx mems[MAX_LDM_STM_OPS];
13737   int i, j, base_reg;
13738   rtx base_reg_rtx;
13739   HOST_WIDE_INT offset;
13740   int write_back = FALSE;
13741   int ldm_case;
13742   rtx addr;
13743
13744   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13745                                      &base_reg, &offset, !sort_regs);
13746
13747   if (ldm_case == 0)
13748     return false;
13749
13750   if (sort_regs)
13751     for (i = 0; i < nops - 1; i++)
13752       for (j = i + 1; j < nops; j++)
13753         if (regs[i] > regs[j])
13754           {
13755             int t = regs[i];
13756             regs[i] = regs[j];
13757             regs[j] = t;
13758           }
13759   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13760
13761   if (TARGET_THUMB1)
13762     {
13763       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13764       gcc_assert (ldm_case == 1 || ldm_case == 5);
13765       write_back = TRUE;
13766     }
13767
13768   if (ldm_case == 5)
13769     {
13770       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13771       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13772       offset = 0;
13773       if (!TARGET_THUMB1)
13774         base_reg_rtx = newbase;
13775     }
13776
13777   for (i = 0; i < nops; i++)
13778     {
13779       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13780       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13781                                               SImode, addr, 0);
13782     }
13783   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13784                                       write_back ? offset + i * 4 : 0));
13785   return true;
13786 }
13787
13788 /* Called from a peephole2 expander to turn a sequence of stores into an
13789    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13790    NOPS indicates how many separate stores we are trying to combine.
13791    Returns true iff we could generate a new instruction.  */
13792
13793 bool
13794 gen_stm_seq (rtx *operands, int nops)
13795 {
13796   int i;
13797   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13798   rtx mems[MAX_LDM_STM_OPS];
13799   int base_reg;
13800   rtx base_reg_rtx;
13801   HOST_WIDE_INT offset;
13802   int write_back = FALSE;
13803   int stm_case;
13804   rtx addr;
13805   bool base_reg_dies;
13806
13807   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13808                                       mem_order, &base_reg, &offset, true);
13809
13810   if (stm_case == 0)
13811     return false;
13812
13813   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13814
13815   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13816   if (TARGET_THUMB1)
13817     {
13818       gcc_assert (base_reg_dies);
13819       write_back = TRUE;
13820     }
13821
13822   if (stm_case == 5)
13823     {
13824       gcc_assert (base_reg_dies);
13825       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13826       offset = 0;
13827     }
13828
13829   addr = plus_constant (Pmode, base_reg_rtx, offset);
13830
13831   for (i = 0; i < nops; i++)
13832     {
13833       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13834       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13835                                               SImode, addr, 0);
13836     }
13837   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13838                                        write_back ? offset + i * 4 : 0));
13839   return true;
13840 }
13841
13842 /* Called from a peephole2 expander to turn a sequence of stores that are
13843    preceded by constant loads into an STM instruction.  OPERANDS are the
13844    operands found by the peephole matcher; NOPS indicates how many
13845    separate stores we are trying to combine; there are 2 * NOPS
13846    instructions in the peephole.
13847    Returns true iff we could generate a new instruction.  */
13848
13849 bool
13850 gen_const_stm_seq (rtx *operands, int nops)
13851 {
13852   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13853   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13854   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13855   rtx mems[MAX_LDM_STM_OPS];
13856   int base_reg;
13857   rtx base_reg_rtx;
13858   HOST_WIDE_INT offset;
13859   int write_back = FALSE;
13860   int stm_case;
13861   rtx addr;
13862   bool base_reg_dies;
13863   int i, j;
13864   HARD_REG_SET allocated;
13865
13866   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13867                                       mem_order, &base_reg, &offset, false);
13868
13869   if (stm_case == 0)
13870     return false;
13871
13872   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13873
13874   /* If the same register is used more than once, try to find a free
13875      register.  */
13876   CLEAR_HARD_REG_SET (allocated);
13877   for (i = 0; i < nops; i++)
13878     {
13879       for (j = i + 1; j < nops; j++)
13880         if (regs[i] == regs[j])
13881           {
13882             rtx t = peep2_find_free_register (0, nops * 2,
13883                                               TARGET_THUMB1 ? "l" : "r",
13884                                               SImode, &allocated);
13885             if (t == NULL_RTX)
13886               return false;
13887             reg_rtxs[i] = t;
13888             regs[i] = REGNO (t);
13889           }
13890     }
13891
13892   /* Compute an ordering that maps the register numbers to an ascending
13893      sequence.  */
13894   reg_order[0] = 0;
13895   for (i = 0; i < nops; i++)
13896     if (regs[i] < regs[reg_order[0]])
13897       reg_order[0] = i;
13898
13899   for (i = 1; i < nops; i++)
13900     {
13901       int this_order = reg_order[i - 1];
13902       for (j = 0; j < nops; j++)
13903         if (regs[j] > regs[reg_order[i - 1]]
13904             && (this_order == reg_order[i - 1]
13905                 || regs[j] < regs[this_order]))
13906           this_order = j;
13907       reg_order[i] = this_order;
13908     }
13909
13910   /* Ensure that registers that must be live after the instruction end
13911      up with the correct value.  */
13912   for (i = 0; i < nops; i++)
13913     {
13914       int this_order = reg_order[i];
13915       if ((this_order != mem_order[i]
13916            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13917           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13918         return false;
13919     }
13920
13921   /* Load the constants.  */
13922   for (i = 0; i < nops; i++)
13923     {
13924       rtx op = operands[2 * nops + mem_order[i]];
13925       sorted_regs[i] = regs[reg_order[i]];
13926       emit_move_insn (reg_rtxs[reg_order[i]], op);
13927     }
13928
13929   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13930
13931   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13932   if (TARGET_THUMB1)
13933     {
13934       gcc_assert (base_reg_dies);
13935       write_back = TRUE;
13936     }
13937
13938   if (stm_case == 5)
13939     {
13940       gcc_assert (base_reg_dies);
13941       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13942       offset = 0;
13943     }
13944
13945   addr = plus_constant (Pmode, base_reg_rtx, offset);
13946
13947   for (i = 0; i < nops; i++)
13948     {
13949       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13950       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13951                                               SImode, addr, 0);
13952     }
13953   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13954                                        write_back ? offset + i * 4 : 0));
13955   return true;
13956 }
13957
13958 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13959    unaligned copies on processors which support unaligned semantics for those
13960    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13961    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13962    An interleave factor of 1 (the minimum) will perform no interleaving.
13963    Load/store multiple are used for aligned addresses where possible.  */
13964
13965 static void
13966 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13967                                    HOST_WIDE_INT length,
13968                                    unsigned int interleave_factor)
13969 {
13970   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13971   int *regnos = XALLOCAVEC (int, interleave_factor);
13972   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13973   HOST_WIDE_INT i, j;
13974   HOST_WIDE_INT remaining = length, words;
13975   rtx halfword_tmp = NULL, byte_tmp = NULL;
13976   rtx dst, src;
13977   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13978   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13979   HOST_WIDE_INT srcoffset, dstoffset;
13980   HOST_WIDE_INT src_autoinc, dst_autoinc;
13981   rtx mem, addr;
13982
13983   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13984
13985   /* Use hard registers if we have aligned source or destination so we can use
13986      load/store multiple with contiguous registers.  */
13987   if (dst_aligned || src_aligned)
13988     for (i = 0; i < interleave_factor; i++)
13989       regs[i] = gen_rtx_REG (SImode, i);
13990   else
13991     for (i = 0; i < interleave_factor; i++)
13992       regs[i] = gen_reg_rtx (SImode);
13993
13994   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13995   src = copy_addr_to_reg (XEXP (srcbase, 0));
13996
13997   srcoffset = dstoffset = 0;
13998
13999   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14000      For copying the last bytes we want to subtract this offset again.  */
14001   src_autoinc = dst_autoinc = 0;
14002
14003   for (i = 0; i < interleave_factor; i++)
14004     regnos[i] = i;
14005
14006   /* Copy BLOCK_SIZE_BYTES chunks.  */
14007
14008   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14009     {
14010       /* Load words.  */
14011       if (src_aligned && interleave_factor > 1)
14012         {
14013           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14014                                             TRUE, srcbase, &srcoffset));
14015           src_autoinc += UNITS_PER_WORD * interleave_factor;
14016         }
14017       else
14018         {
14019           for (j = 0; j < interleave_factor; j++)
14020             {
14021               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14022                                                  - src_autoinc));
14023               mem = adjust_automodify_address (srcbase, SImode, addr,
14024                                                srcoffset + j * UNITS_PER_WORD);
14025               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14026             }
14027           srcoffset += block_size_bytes;
14028         }
14029
14030       /* Store words.  */
14031       if (dst_aligned && interleave_factor > 1)
14032         {
14033           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14034                                              TRUE, dstbase, &dstoffset));
14035           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14036         }
14037       else
14038         {
14039           for (j = 0; j < interleave_factor; j++)
14040             {
14041               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14042                                                  - dst_autoinc));
14043               mem = adjust_automodify_address (dstbase, SImode, addr,
14044                                                dstoffset + j * UNITS_PER_WORD);
14045               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14046             }
14047           dstoffset += block_size_bytes;
14048         }
14049
14050       remaining -= block_size_bytes;
14051     }
14052
14053   /* Copy any whole words left (note these aren't interleaved with any
14054      subsequent halfword/byte load/stores in the interests of simplicity).  */
14055
14056   words = remaining / UNITS_PER_WORD;
14057
14058   gcc_assert (words < interleave_factor);
14059
14060   if (src_aligned && words > 1)
14061     {
14062       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14063                                         &srcoffset));
14064       src_autoinc += UNITS_PER_WORD * words;
14065     }
14066   else
14067     {
14068       for (j = 0; j < words; j++)
14069         {
14070           addr = plus_constant (Pmode, src,
14071                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14072           mem = adjust_automodify_address (srcbase, SImode, addr,
14073                                            srcoffset + j * UNITS_PER_WORD);
14074           if (src_aligned)
14075             emit_move_insn (regs[j], mem);
14076           else
14077             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14078         }
14079       srcoffset += words * UNITS_PER_WORD;
14080     }
14081
14082   if (dst_aligned && words > 1)
14083     {
14084       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14085                                          &dstoffset));
14086       dst_autoinc += words * UNITS_PER_WORD;
14087     }
14088   else
14089     {
14090       for (j = 0; j < words; j++)
14091         {
14092           addr = plus_constant (Pmode, dst,
14093                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14094           mem = adjust_automodify_address (dstbase, SImode, addr,
14095                                            dstoffset + j * UNITS_PER_WORD);
14096           if (dst_aligned)
14097             emit_move_insn (mem, regs[j]);
14098           else
14099             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14100         }
14101       dstoffset += words * UNITS_PER_WORD;
14102     }
14103
14104   remaining -= words * UNITS_PER_WORD;
14105
14106   gcc_assert (remaining < 4);
14107
14108   /* Copy a halfword if necessary.  */
14109
14110   if (remaining >= 2)
14111     {
14112       halfword_tmp = gen_reg_rtx (SImode);
14113
14114       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14115       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14116       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14117
14118       /* Either write out immediately, or delay until we've loaded the last
14119          byte, depending on interleave factor.  */
14120       if (interleave_factor == 1)
14121         {
14122           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14123           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14124           emit_insn (gen_unaligned_storehi (mem,
14125                        gen_lowpart (HImode, halfword_tmp)));
14126           halfword_tmp = NULL;
14127           dstoffset += 2;
14128         }
14129
14130       remaining -= 2;
14131       srcoffset += 2;
14132     }
14133
14134   gcc_assert (remaining < 2);
14135
14136   /* Copy last byte.  */
14137
14138   if ((remaining & 1) != 0)
14139     {
14140       byte_tmp = gen_reg_rtx (SImode);
14141
14142       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14143       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14144       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14145
14146       if (interleave_factor == 1)
14147         {
14148           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14149           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14150           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14151           byte_tmp = NULL;
14152           dstoffset++;
14153         }
14154
14155       remaining--;
14156       srcoffset++;
14157     }
14158
14159   /* Store last halfword if we haven't done so already.  */
14160
14161   if (halfword_tmp)
14162     {
14163       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14164       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14165       emit_insn (gen_unaligned_storehi (mem,
14166                    gen_lowpart (HImode, halfword_tmp)));
14167       dstoffset += 2;
14168     }
14169
14170   /* Likewise for last byte.  */
14171
14172   if (byte_tmp)
14173     {
14174       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14175       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14176       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14177       dstoffset++;
14178     }
14179
14180   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14181 }
14182
14183 /* From mips_adjust_block_mem:
14184
14185    Helper function for doing a loop-based block operation on memory
14186    reference MEM.  Each iteration of the loop will operate on LENGTH
14187    bytes of MEM.
14188
14189    Create a new base register for use within the loop and point it to
14190    the start of MEM.  Create a new memory reference that uses this
14191    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14192
14193 static void
14194 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14195                       rtx *loop_mem)
14196 {
14197   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14198
14199   /* Although the new mem does not refer to a known location,
14200      it does keep up to LENGTH bytes of alignment.  */
14201   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14202   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14203 }
14204
14205 /* From mips_block_move_loop:
14206
14207    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14208    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14209    the memory regions do not overlap.  */
14210
14211 static void
14212 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14213                                unsigned int interleave_factor,
14214                                HOST_WIDE_INT bytes_per_iter)
14215 {
14216   rtx src_reg, dest_reg, final_src, test;
14217   HOST_WIDE_INT leftover;
14218
14219   leftover = length % bytes_per_iter;
14220   length -= leftover;
14221
14222   /* Create registers and memory references for use within the loop.  */
14223   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14224   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14225
14226   /* Calculate the value that SRC_REG should have after the last iteration of
14227      the loop.  */
14228   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14229                                    0, 0, OPTAB_WIDEN);
14230
14231   /* Emit the start of the loop.  */
14232   rtx_code_label *label = gen_label_rtx ();
14233   emit_label (label);
14234
14235   /* Emit the loop body.  */
14236   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14237                                      interleave_factor);
14238
14239   /* Move on to the next block.  */
14240   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14241   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14242
14243   /* Emit the loop condition.  */
14244   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14245   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14246
14247   /* Mop up any left-over bytes.  */
14248   if (leftover)
14249     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14250 }
14251
14252 /* Emit a block move when either the source or destination is unaligned (not
14253    aligned to a four-byte boundary).  This may need further tuning depending on
14254    core type, optimize_size setting, etc.  */
14255
14256 static int
14257 arm_movmemqi_unaligned (rtx *operands)
14258 {
14259   HOST_WIDE_INT length = INTVAL (operands[2]);
14260
14261   if (optimize_size)
14262     {
14263       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14264       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14265       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14266          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14267          or dst_aligned though: allow more interleaving in those cases since the
14268          resulting code can be smaller.  */
14269       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14270       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14271
14272       if (length > 12)
14273         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14274                                        interleave_factor, bytes_per_iter);
14275       else
14276         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14277                                            interleave_factor);
14278     }
14279   else
14280     {
14281       /* Note that the loop created by arm_block_move_unaligned_loop may be
14282          subject to loop unrolling, which makes tuning this condition a little
14283          redundant.  */
14284       if (length > 32)
14285         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14286       else
14287         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14288     }
14289
14290   return 1;
14291 }
14292
14293 int
14294 arm_gen_movmemqi (rtx *operands)
14295 {
14296   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14297   HOST_WIDE_INT srcoffset, dstoffset;
14298   rtx src, dst, srcbase, dstbase;
14299   rtx part_bytes_reg = NULL;
14300   rtx mem;
14301
14302   if (!CONST_INT_P (operands[2])
14303       || !CONST_INT_P (operands[3])
14304       || INTVAL (operands[2]) > 64)
14305     return 0;
14306
14307   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14308     return arm_movmemqi_unaligned (operands);
14309
14310   if (INTVAL (operands[3]) & 3)
14311     return 0;
14312
14313   dstbase = operands[0];
14314   srcbase = operands[1];
14315
14316   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14317   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14318
14319   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14320   out_words_to_go = INTVAL (operands[2]) / 4;
14321   last_bytes = INTVAL (operands[2]) & 3;
14322   dstoffset = srcoffset = 0;
14323
14324   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14325     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14326
14327   while (in_words_to_go >= 2)
14328     {
14329       if (in_words_to_go > 4)
14330         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14331                                           TRUE, srcbase, &srcoffset));
14332       else
14333         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14334                                           src, FALSE, srcbase,
14335                                           &srcoffset));
14336
14337       if (out_words_to_go)
14338         {
14339           if (out_words_to_go > 4)
14340             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14341                                                TRUE, dstbase, &dstoffset));
14342           else if (out_words_to_go != 1)
14343             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14344                                                out_words_to_go, dst,
14345                                                (last_bytes == 0
14346                                                 ? FALSE : TRUE),
14347                                                dstbase, &dstoffset));
14348           else
14349             {
14350               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14351               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14352               if (last_bytes != 0)
14353                 {
14354                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14355                   dstoffset += 4;
14356                 }
14357             }
14358         }
14359
14360       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14361       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14362     }
14363
14364   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14365   if (out_words_to_go)
14366     {
14367       rtx sreg;
14368
14369       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14370       sreg = copy_to_reg (mem);
14371
14372       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14373       emit_move_insn (mem, sreg);
14374       in_words_to_go--;
14375
14376       gcc_assert (!in_words_to_go);     /* Sanity check */
14377     }
14378
14379   if (in_words_to_go)
14380     {
14381       gcc_assert (in_words_to_go > 0);
14382
14383       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14384       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14385     }
14386
14387   gcc_assert (!last_bytes || part_bytes_reg);
14388
14389   if (BYTES_BIG_ENDIAN && last_bytes)
14390     {
14391       rtx tmp = gen_reg_rtx (SImode);
14392
14393       /* The bytes we want are in the top end of the word.  */
14394       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14395                               GEN_INT (8 * (4 - last_bytes))));
14396       part_bytes_reg = tmp;
14397
14398       while (last_bytes)
14399         {
14400           mem = adjust_automodify_address (dstbase, QImode,
14401                                            plus_constant (Pmode, dst,
14402                                                           last_bytes - 1),
14403                                            dstoffset + last_bytes - 1);
14404           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14405
14406           if (--last_bytes)
14407             {
14408               tmp = gen_reg_rtx (SImode);
14409               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14410               part_bytes_reg = tmp;
14411             }
14412         }
14413
14414     }
14415   else
14416     {
14417       if (last_bytes > 1)
14418         {
14419           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14420           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14421           last_bytes -= 2;
14422           if (last_bytes)
14423             {
14424               rtx tmp = gen_reg_rtx (SImode);
14425               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14426               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14427               part_bytes_reg = tmp;
14428               dstoffset += 2;
14429             }
14430         }
14431
14432       if (last_bytes)
14433         {
14434           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14435           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14436         }
14437     }
14438
14439   return 1;
14440 }
14441
14442 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14443 by mode size.  */
14444 inline static rtx
14445 next_consecutive_mem (rtx mem)
14446 {
14447   machine_mode mode = GET_MODE (mem);
14448   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14449   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14450
14451   return adjust_automodify_address (mem, mode, addr, offset);
14452 }
14453
14454 /* Copy using LDRD/STRD instructions whenever possible.
14455    Returns true upon success. */
14456 bool
14457 gen_movmem_ldrd_strd (rtx *operands)
14458 {
14459   unsigned HOST_WIDE_INT len;
14460   HOST_WIDE_INT align;
14461   rtx src, dst, base;
14462   rtx reg0;
14463   bool src_aligned, dst_aligned;
14464   bool src_volatile, dst_volatile;
14465
14466   gcc_assert (CONST_INT_P (operands[2]));
14467   gcc_assert (CONST_INT_P (operands[3]));
14468
14469   len = UINTVAL (operands[2]);
14470   if (len > 64)
14471     return false;
14472
14473   /* Maximum alignment we can assume for both src and dst buffers.  */
14474   align = INTVAL (operands[3]);
14475
14476   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14477     return false;
14478
14479   /* Place src and dst addresses in registers
14480      and update the corresponding mem rtx.  */
14481   dst = operands[0];
14482   dst_volatile = MEM_VOLATILE_P (dst);
14483   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14484   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14485   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14486
14487   src = operands[1];
14488   src_volatile = MEM_VOLATILE_P (src);
14489   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14490   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14491   src = adjust_automodify_address (src, VOIDmode, base, 0);
14492
14493   if (!unaligned_access && !(src_aligned && dst_aligned))
14494     return false;
14495
14496   if (src_volatile || dst_volatile)
14497     return false;
14498
14499   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14500   if (!(dst_aligned || src_aligned))
14501     return arm_gen_movmemqi (operands);
14502
14503   /* If the either src or dst is unaligned we'll be accessing it as pairs
14504      of unaligned SImode accesses.  Otherwise we can generate DImode
14505      ldrd/strd instructions.  */
14506   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14507   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14508
14509   while (len >= 8)
14510     {
14511       len -= 8;
14512       reg0 = gen_reg_rtx (DImode);
14513       rtx low_reg = NULL_RTX;
14514       rtx hi_reg = NULL_RTX;
14515
14516       if (!src_aligned || !dst_aligned)
14517         {
14518           low_reg = gen_lowpart (SImode, reg0);
14519           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14520         }
14521       if (src_aligned)
14522         emit_move_insn (reg0, src);
14523       else
14524         {
14525           emit_insn (gen_unaligned_loadsi (low_reg, src));
14526           src = next_consecutive_mem (src);
14527           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14528         }
14529
14530       if (dst_aligned)
14531         emit_move_insn (dst, reg0);
14532       else
14533         {
14534           emit_insn (gen_unaligned_storesi (dst, low_reg));
14535           dst = next_consecutive_mem (dst);
14536           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14537         }
14538
14539       src = next_consecutive_mem (src);
14540       dst = next_consecutive_mem (dst);
14541     }
14542
14543   gcc_assert (len < 8);
14544   if (len >= 4)
14545     {
14546       /* More than a word but less than a double-word to copy.  Copy a word.  */
14547       reg0 = gen_reg_rtx (SImode);
14548       src = adjust_address (src, SImode, 0);
14549       dst = adjust_address (dst, SImode, 0);
14550       if (src_aligned)
14551         emit_move_insn (reg0, src);
14552       else
14553         emit_insn (gen_unaligned_loadsi (reg0, src));
14554
14555       if (dst_aligned)
14556         emit_move_insn (dst, reg0);
14557       else
14558         emit_insn (gen_unaligned_storesi (dst, reg0));
14559
14560       src = next_consecutive_mem (src);
14561       dst = next_consecutive_mem (dst);
14562       len -= 4;
14563     }
14564
14565   if (len == 0)
14566     return true;
14567
14568   /* Copy the remaining bytes.  */
14569   if (len >= 2)
14570     {
14571       dst = adjust_address (dst, HImode, 0);
14572       src = adjust_address (src, HImode, 0);
14573       reg0 = gen_reg_rtx (SImode);
14574       if (src_aligned)
14575         emit_insn (gen_zero_extendhisi2 (reg0, src));
14576       else
14577         emit_insn (gen_unaligned_loadhiu (reg0, src));
14578
14579       if (dst_aligned)
14580         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14581       else
14582         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14583
14584       src = next_consecutive_mem (src);
14585       dst = next_consecutive_mem (dst);
14586       if (len == 2)
14587         return true;
14588     }
14589
14590   dst = adjust_address (dst, QImode, 0);
14591   src = adjust_address (src, QImode, 0);
14592   reg0 = gen_reg_rtx (QImode);
14593   emit_move_insn (reg0, src);
14594   emit_move_insn (dst, reg0);
14595   return true;
14596 }
14597
14598 /* Select a dominance comparison mode if possible for a test of the general
14599    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14600    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14601    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14602    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14603    In all cases OP will be either EQ or NE, but we don't need to know which
14604    here.  If we are unable to support a dominance comparison we return
14605    CC mode.  This will then fail to match for the RTL expressions that
14606    generate this call.  */
14607 machine_mode
14608 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14609 {
14610   enum rtx_code cond1, cond2;
14611   int swapped = 0;
14612
14613   /* Currently we will probably get the wrong result if the individual
14614      comparisons are not simple.  This also ensures that it is safe to
14615      reverse a comparison if necessary.  */
14616   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14617        != CCmode)
14618       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14619           != CCmode))
14620     return CCmode;
14621
14622   /* The if_then_else variant of this tests the second condition if the
14623      first passes, but is true if the first fails.  Reverse the first
14624      condition to get a true "inclusive-or" expression.  */
14625   if (cond_or == DOM_CC_NX_OR_Y)
14626     cond1 = reverse_condition (cond1);
14627
14628   /* If the comparisons are not equal, and one doesn't dominate the other,
14629      then we can't do this.  */
14630   if (cond1 != cond2
14631       && !comparison_dominates_p (cond1, cond2)
14632       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14633     return CCmode;
14634
14635   if (swapped)
14636     std::swap (cond1, cond2);
14637
14638   switch (cond1)
14639     {
14640     case EQ:
14641       if (cond_or == DOM_CC_X_AND_Y)
14642         return CC_DEQmode;
14643
14644       switch (cond2)
14645         {
14646         case EQ: return CC_DEQmode;
14647         case LE: return CC_DLEmode;
14648         case LEU: return CC_DLEUmode;
14649         case GE: return CC_DGEmode;
14650         case GEU: return CC_DGEUmode;
14651         default: gcc_unreachable ();
14652         }
14653
14654     case LT:
14655       if (cond_or == DOM_CC_X_AND_Y)
14656         return CC_DLTmode;
14657
14658       switch (cond2)
14659         {
14660         case  LT:
14661             return CC_DLTmode;
14662         case LE:
14663           return CC_DLEmode;
14664         case NE:
14665           return CC_DNEmode;
14666         default:
14667           gcc_unreachable ();
14668         }
14669
14670     case GT:
14671       if (cond_or == DOM_CC_X_AND_Y)
14672         return CC_DGTmode;
14673
14674       switch (cond2)
14675         {
14676         case GT:
14677           return CC_DGTmode;
14678         case GE:
14679           return CC_DGEmode;
14680         case NE:
14681           return CC_DNEmode;
14682         default:
14683           gcc_unreachable ();
14684         }
14685
14686     case LTU:
14687       if (cond_or == DOM_CC_X_AND_Y)
14688         return CC_DLTUmode;
14689
14690       switch (cond2)
14691         {
14692         case LTU:
14693           return CC_DLTUmode;
14694         case LEU:
14695           return CC_DLEUmode;
14696         case NE:
14697           return CC_DNEmode;
14698         default:
14699           gcc_unreachable ();
14700         }
14701
14702     case GTU:
14703       if (cond_or == DOM_CC_X_AND_Y)
14704         return CC_DGTUmode;
14705
14706       switch (cond2)
14707         {
14708         case GTU:
14709           return CC_DGTUmode;
14710         case GEU:
14711           return CC_DGEUmode;
14712         case NE:
14713           return CC_DNEmode;
14714         default:
14715           gcc_unreachable ();
14716         }
14717
14718     /* The remaining cases only occur when both comparisons are the
14719        same.  */
14720     case NE:
14721       gcc_assert (cond1 == cond2);
14722       return CC_DNEmode;
14723
14724     case LE:
14725       gcc_assert (cond1 == cond2);
14726       return CC_DLEmode;
14727
14728     case GE:
14729       gcc_assert (cond1 == cond2);
14730       return CC_DGEmode;
14731
14732     case LEU:
14733       gcc_assert (cond1 == cond2);
14734       return CC_DLEUmode;
14735
14736     case GEU:
14737       gcc_assert (cond1 == cond2);
14738       return CC_DGEUmode;
14739
14740     default:
14741       gcc_unreachable ();
14742     }
14743 }
14744
14745 machine_mode
14746 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14747 {
14748   /* All floating point compares return CCFP if it is an equality
14749      comparison, and CCFPE otherwise.  */
14750   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14751     {
14752       switch (op)
14753         {
14754         case EQ:
14755         case NE:
14756         case UNORDERED:
14757         case ORDERED:
14758         case UNLT:
14759         case UNLE:
14760         case UNGT:
14761         case UNGE:
14762         case UNEQ:
14763         case LTGT:
14764           return CCFPmode;
14765
14766         case LT:
14767         case LE:
14768         case GT:
14769         case GE:
14770           return CCFPEmode;
14771
14772         default:
14773           gcc_unreachable ();
14774         }
14775     }
14776
14777   /* A compare with a shifted operand.  Because of canonicalization, the
14778      comparison will have to be swapped when we emit the assembler.  */
14779   if (GET_MODE (y) == SImode
14780       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14781       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14782           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14783           || GET_CODE (x) == ROTATERT))
14784     return CC_SWPmode;
14785
14786   /* This operation is performed swapped, but since we only rely on the Z
14787      flag we don't need an additional mode.  */
14788   if (GET_MODE (y) == SImode
14789       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14790       && GET_CODE (x) == NEG
14791       && (op == EQ || op == NE))
14792     return CC_Zmode;
14793
14794   /* This is a special case that is used by combine to allow a
14795      comparison of a shifted byte load to be split into a zero-extend
14796      followed by a comparison of the shifted integer (only valid for
14797      equalities and unsigned inequalities).  */
14798   if (GET_MODE (x) == SImode
14799       && GET_CODE (x) == ASHIFT
14800       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14801       && GET_CODE (XEXP (x, 0)) == SUBREG
14802       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14803       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14804       && (op == EQ || op == NE
14805           || op == GEU || op == GTU || op == LTU || op == LEU)
14806       && CONST_INT_P (y))
14807     return CC_Zmode;
14808
14809   /* A construct for a conditional compare, if the false arm contains
14810      0, then both conditions must be true, otherwise either condition
14811      must be true.  Not all conditions are possible, so CCmode is
14812      returned if it can't be done.  */
14813   if (GET_CODE (x) == IF_THEN_ELSE
14814       && (XEXP (x, 2) == const0_rtx
14815           || XEXP (x, 2) == const1_rtx)
14816       && COMPARISON_P (XEXP (x, 0))
14817       && COMPARISON_P (XEXP (x, 1)))
14818     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14819                                          INTVAL (XEXP (x, 2)));
14820
14821   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14822   if (GET_CODE (x) == AND
14823       && (op == EQ || op == NE)
14824       && COMPARISON_P (XEXP (x, 0))
14825       && COMPARISON_P (XEXP (x, 1)))
14826     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14827                                          DOM_CC_X_AND_Y);
14828
14829   if (GET_CODE (x) == IOR
14830       && (op == EQ || op == NE)
14831       && COMPARISON_P (XEXP (x, 0))
14832       && COMPARISON_P (XEXP (x, 1)))
14833     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14834                                          DOM_CC_X_OR_Y);
14835
14836   /* An operation (on Thumb) where we want to test for a single bit.
14837      This is done by shifting that bit up into the top bit of a
14838      scratch register; we can then branch on the sign bit.  */
14839   if (TARGET_THUMB1
14840       && GET_MODE (x) == SImode
14841       && (op == EQ || op == NE)
14842       && GET_CODE (x) == ZERO_EXTRACT
14843       && XEXP (x, 1) == const1_rtx)
14844     return CC_Nmode;
14845
14846   /* An operation that sets the condition codes as a side-effect, the
14847      V flag is not set correctly, so we can only use comparisons where
14848      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14849      instead.)  */
14850   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14851   if (GET_MODE (x) == SImode
14852       && y == const0_rtx
14853       && (op == EQ || op == NE || op == LT || op == GE)
14854       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14855           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14856           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14857           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14858           || GET_CODE (x) == LSHIFTRT
14859           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14860           || GET_CODE (x) == ROTATERT
14861           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14862     return CC_NOOVmode;
14863
14864   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14865     return CC_Zmode;
14866
14867   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14868       && GET_CODE (x) == PLUS
14869       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14870     return CC_Cmode;
14871
14872   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14873     {
14874       switch (op)
14875         {
14876         case EQ:
14877         case NE:
14878           /* A DImode comparison against zero can be implemented by
14879              or'ing the two halves together.  */
14880           if (y == const0_rtx)
14881             return CC_Zmode;
14882
14883           /* We can do an equality test in three Thumb instructions.  */
14884           if (!TARGET_32BIT)
14885             return CC_Zmode;
14886
14887           /* FALLTHROUGH */
14888
14889         case LTU:
14890         case LEU:
14891         case GTU:
14892         case GEU:
14893           /* DImode unsigned comparisons can be implemented by cmp +
14894              cmpeq without a scratch register.  Not worth doing in
14895              Thumb-2.  */
14896           if (TARGET_32BIT)
14897             return CC_CZmode;
14898
14899           /* FALLTHROUGH */
14900
14901         case LT:
14902         case LE:
14903         case GT:
14904         case GE:
14905           /* DImode signed and unsigned comparisons can be implemented
14906              by cmp + sbcs with a scratch register, but that does not
14907              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14908           gcc_assert (op != EQ && op != NE);
14909           return CC_NCVmode;
14910
14911         default:
14912           gcc_unreachable ();
14913         }
14914     }
14915
14916   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14917     return GET_MODE (x);
14918
14919   return CCmode;
14920 }
14921
14922 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14923    return the rtx for register 0 in the proper mode.  FP means this is a
14924    floating point compare: I don't think that it is needed on the arm.  */
14925 rtx
14926 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14927 {
14928   machine_mode mode;
14929   rtx cc_reg;
14930   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14931
14932   /* We might have X as a constant, Y as a register because of the predicates
14933      used for cmpdi.  If so, force X to a register here.  */
14934   if (dimode_comparison && !REG_P (x))
14935     x = force_reg (DImode, x);
14936
14937   mode = SELECT_CC_MODE (code, x, y);
14938   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14939
14940   if (dimode_comparison
14941       && mode != CC_CZmode)
14942     {
14943       rtx clobber, set;
14944
14945       /* To compare two non-zero values for equality, XOR them and
14946          then compare against zero.  Not used for ARM mode; there
14947          CC_CZmode is cheaper.  */
14948       if (mode == CC_Zmode && y != const0_rtx)
14949         {
14950           gcc_assert (!reload_completed);
14951           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14952           y = const0_rtx;
14953         }
14954
14955       /* A scratch register is required.  */
14956       if (reload_completed)
14957         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14958       else
14959         scratch = gen_rtx_SCRATCH (SImode);
14960
14961       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14962       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14963       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14964     }
14965   else
14966     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14967
14968   return cc_reg;
14969 }
14970
14971 /* Generate a sequence of insns that will generate the correct return
14972    address mask depending on the physical architecture that the program
14973    is running on.  */
14974 rtx
14975 arm_gen_return_addr_mask (void)
14976 {
14977   rtx reg = gen_reg_rtx (Pmode);
14978
14979   emit_insn (gen_return_addr_mask (reg));
14980   return reg;
14981 }
14982
14983 void
14984 arm_reload_in_hi (rtx *operands)
14985 {
14986   rtx ref = operands[1];
14987   rtx base, scratch;
14988   HOST_WIDE_INT offset = 0;
14989
14990   if (GET_CODE (ref) == SUBREG)
14991     {
14992       offset = SUBREG_BYTE (ref);
14993       ref = SUBREG_REG (ref);
14994     }
14995
14996   if (REG_P (ref))
14997     {
14998       /* We have a pseudo which has been spilt onto the stack; there
14999          are two cases here: the first where there is a simple
15000          stack-slot replacement and a second where the stack-slot is
15001          out of range, or is used as a subreg.  */
15002       if (reg_equiv_mem (REGNO (ref)))
15003         {
15004           ref = reg_equiv_mem (REGNO (ref));
15005           base = find_replacement (&XEXP (ref, 0));
15006         }
15007       else
15008         /* The slot is out of range, or was dressed up in a SUBREG.  */
15009         base = reg_equiv_address (REGNO (ref));
15010
15011       /* PR 62554: If there is no equivalent memory location then just move
15012          the value as an SImode register move.  This happens when the target
15013          architecture variant does not have an HImode register move.  */
15014       if (base == NULL)
15015         {
15016           gcc_assert (REG_P (operands[0]));
15017           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15018                                 gen_rtx_SUBREG (SImode, ref, 0)));
15019           return;
15020         }
15021     }
15022   else
15023     base = find_replacement (&XEXP (ref, 0));
15024
15025   /* Handle the case where the address is too complex to be offset by 1.  */
15026   if (GET_CODE (base) == MINUS
15027       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15028     {
15029       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15030
15031       emit_set_insn (base_plus, base);
15032       base = base_plus;
15033     }
15034   else if (GET_CODE (base) == PLUS)
15035     {
15036       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15037       HOST_WIDE_INT hi, lo;
15038
15039       offset += INTVAL (XEXP (base, 1));
15040       base = XEXP (base, 0);
15041
15042       /* Rework the address into a legal sequence of insns.  */
15043       /* Valid range for lo is -4095 -> 4095 */
15044       lo = (offset >= 0
15045             ? (offset & 0xfff)
15046             : -((-offset) & 0xfff));
15047
15048       /* Corner case, if lo is the max offset then we would be out of range
15049          once we have added the additional 1 below, so bump the msb into the
15050          pre-loading insn(s).  */
15051       if (lo == 4095)
15052         lo &= 0x7ff;
15053
15054       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15055              ^ (HOST_WIDE_INT) 0x80000000)
15056             - (HOST_WIDE_INT) 0x80000000);
15057
15058       gcc_assert (hi + lo == offset);
15059
15060       if (hi != 0)
15061         {
15062           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15063
15064           /* Get the base address; addsi3 knows how to handle constants
15065              that require more than one insn.  */
15066           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15067           base = base_plus;
15068           offset = lo;
15069         }
15070     }
15071
15072   /* Operands[2] may overlap operands[0] (though it won't overlap
15073      operands[1]), that's why we asked for a DImode reg -- so we can
15074      use the bit that does not overlap.  */
15075   if (REGNO (operands[2]) == REGNO (operands[0]))
15076     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15077   else
15078     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15079
15080   emit_insn (gen_zero_extendqisi2 (scratch,
15081                                    gen_rtx_MEM (QImode,
15082                                                 plus_constant (Pmode, base,
15083                                                                offset))));
15084   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15085                                    gen_rtx_MEM (QImode,
15086                                                 plus_constant (Pmode, base,
15087                                                                offset + 1))));
15088   if (!BYTES_BIG_ENDIAN)
15089     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15090                    gen_rtx_IOR (SImode,
15091                                 gen_rtx_ASHIFT
15092                                 (SImode,
15093                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15094                                  GEN_INT (8)),
15095                                 scratch));
15096   else
15097     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15098                    gen_rtx_IOR (SImode,
15099                                 gen_rtx_ASHIFT (SImode, scratch,
15100                                                 GEN_INT (8)),
15101                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15102 }
15103
15104 /* Handle storing a half-word to memory during reload by synthesizing as two
15105    byte stores.  Take care not to clobber the input values until after we
15106    have moved them somewhere safe.  This code assumes that if the DImode
15107    scratch in operands[2] overlaps either the input value or output address
15108    in some way, then that value must die in this insn (we absolutely need
15109    two scratch registers for some corner cases).  */
15110 void
15111 arm_reload_out_hi (rtx *operands)
15112 {
15113   rtx ref = operands[0];
15114   rtx outval = operands[1];
15115   rtx base, scratch;
15116   HOST_WIDE_INT offset = 0;
15117
15118   if (GET_CODE (ref) == SUBREG)
15119     {
15120       offset = SUBREG_BYTE (ref);
15121       ref = SUBREG_REG (ref);
15122     }
15123
15124   if (REG_P (ref))
15125     {
15126       /* We have a pseudo which has been spilt onto the stack; there
15127          are two cases here: the first where there is a simple
15128          stack-slot replacement and a second where the stack-slot is
15129          out of range, or is used as a subreg.  */
15130       if (reg_equiv_mem (REGNO (ref)))
15131         {
15132           ref = reg_equiv_mem (REGNO (ref));
15133           base = find_replacement (&XEXP (ref, 0));
15134         }
15135       else
15136         /* The slot is out of range, or was dressed up in a SUBREG.  */
15137         base = reg_equiv_address (REGNO (ref));
15138
15139       /* PR 62254: If there is no equivalent memory location then just move
15140          the value as an SImode register move.  This happens when the target
15141          architecture variant does not have an HImode register move.  */
15142       if (base == NULL)
15143         {
15144           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15145
15146           if (REG_P (outval))
15147             {
15148               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15149                                     gen_rtx_SUBREG (SImode, outval, 0)));
15150             }
15151           else /* SUBREG_P (outval)  */
15152             {
15153               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15154                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15155                                       SUBREG_REG (outval)));
15156               else
15157                 /* FIXME: Handle other cases ?  */
15158                 gcc_unreachable ();
15159             }
15160           return;
15161         }
15162     }
15163   else
15164     base = find_replacement (&XEXP (ref, 0));
15165
15166   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15167
15168   /* Handle the case where the address is too complex to be offset by 1.  */
15169   if (GET_CODE (base) == MINUS
15170       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15171     {
15172       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15173
15174       /* Be careful not to destroy OUTVAL.  */
15175       if (reg_overlap_mentioned_p (base_plus, outval))
15176         {
15177           /* Updating base_plus might destroy outval, see if we can
15178              swap the scratch and base_plus.  */
15179           if (!reg_overlap_mentioned_p (scratch, outval))
15180             std::swap (scratch, base_plus);
15181           else
15182             {
15183               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15184
15185               /* Be conservative and copy OUTVAL into the scratch now,
15186                  this should only be necessary if outval is a subreg
15187                  of something larger than a word.  */
15188               /* XXX Might this clobber base?  I can't see how it can,
15189                  since scratch is known to overlap with OUTVAL, and
15190                  must be wider than a word.  */
15191               emit_insn (gen_movhi (scratch_hi, outval));
15192               outval = scratch_hi;
15193             }
15194         }
15195
15196       emit_set_insn (base_plus, base);
15197       base = base_plus;
15198     }
15199   else if (GET_CODE (base) == PLUS)
15200     {
15201       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15202       HOST_WIDE_INT hi, lo;
15203
15204       offset += INTVAL (XEXP (base, 1));
15205       base = XEXP (base, 0);
15206
15207       /* Rework the address into a legal sequence of insns.  */
15208       /* Valid range for lo is -4095 -> 4095 */
15209       lo = (offset >= 0
15210             ? (offset & 0xfff)
15211             : -((-offset) & 0xfff));
15212
15213       /* Corner case, if lo is the max offset then we would be out of range
15214          once we have added the additional 1 below, so bump the msb into the
15215          pre-loading insn(s).  */
15216       if (lo == 4095)
15217         lo &= 0x7ff;
15218
15219       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15220              ^ (HOST_WIDE_INT) 0x80000000)
15221             - (HOST_WIDE_INT) 0x80000000);
15222
15223       gcc_assert (hi + lo == offset);
15224
15225       if (hi != 0)
15226         {
15227           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15228
15229           /* Be careful not to destroy OUTVAL.  */
15230           if (reg_overlap_mentioned_p (base_plus, outval))
15231             {
15232               /* Updating base_plus might destroy outval, see if we
15233                  can swap the scratch and base_plus.  */
15234               if (!reg_overlap_mentioned_p (scratch, outval))
15235                 std::swap (scratch, base_plus);
15236               else
15237                 {
15238                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15239
15240                   /* Be conservative and copy outval into scratch now,
15241                      this should only be necessary if outval is a
15242                      subreg of something larger than a word.  */
15243                   /* XXX Might this clobber base?  I can't see how it
15244                      can, since scratch is known to overlap with
15245                      outval.  */
15246                   emit_insn (gen_movhi (scratch_hi, outval));
15247                   outval = scratch_hi;
15248                 }
15249             }
15250
15251           /* Get the base address; addsi3 knows how to handle constants
15252              that require more than one insn.  */
15253           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15254           base = base_plus;
15255           offset = lo;
15256         }
15257     }
15258
15259   if (BYTES_BIG_ENDIAN)
15260     {
15261       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15262                                          plus_constant (Pmode, base,
15263                                                         offset + 1)),
15264                             gen_lowpart (QImode, outval)));
15265       emit_insn (gen_lshrsi3 (scratch,
15266                               gen_rtx_SUBREG (SImode, outval, 0),
15267                               GEN_INT (8)));
15268       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15269                                                                 offset)),
15270                             gen_lowpart (QImode, scratch)));
15271     }
15272   else
15273     {
15274       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15275                                                                 offset)),
15276                             gen_lowpart (QImode, outval)));
15277       emit_insn (gen_lshrsi3 (scratch,
15278                               gen_rtx_SUBREG (SImode, outval, 0),
15279                               GEN_INT (8)));
15280       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15281                                          plus_constant (Pmode, base,
15282                                                         offset + 1)),
15283                             gen_lowpart (QImode, scratch)));
15284     }
15285 }
15286
15287 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15288    (padded to the size of a word) should be passed in a register.  */
15289
15290 static bool
15291 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15292 {
15293   if (TARGET_AAPCS_BASED)
15294     return must_pass_in_stack_var_size (mode, type);
15295   else
15296     return must_pass_in_stack_var_size_or_pad (mode, type);
15297 }
15298
15299
15300 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15301    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15302    the default.  For AAPCS based ABIs small aggregate types are placed
15303    in the lowest memory address.  */
15304
15305 static pad_direction
15306 arm_function_arg_padding (machine_mode mode, const_tree type)
15307 {
15308   if (!TARGET_AAPCS_BASED)
15309     return default_function_arg_padding (mode, type);
15310
15311   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15312     return PAD_DOWNWARD;
15313
15314   return PAD_UPWARD;
15315 }
15316
15317
15318 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15319    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15320    register has useful data, and return the opposite if the most
15321    significant byte does.  */
15322
15323 bool
15324 arm_pad_reg_upward (machine_mode mode,
15325                     tree type, int first ATTRIBUTE_UNUSED)
15326 {
15327   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15328     {
15329       /* For AAPCS, small aggregates, small fixed-point types,
15330          and small complex types are always padded upwards.  */
15331       if (type)
15332         {
15333           if ((AGGREGATE_TYPE_P (type)
15334                || TREE_CODE (type) == COMPLEX_TYPE
15335                || FIXED_POINT_TYPE_P (type))
15336               && int_size_in_bytes (type) <= 4)
15337             return true;
15338         }
15339       else
15340         {
15341           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15342               && GET_MODE_SIZE (mode) <= 4)
15343             return true;
15344         }
15345     }
15346
15347   /* Otherwise, use default padding.  */
15348   return !BYTES_BIG_ENDIAN;
15349 }
15350
15351 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15352    assuming that the address in the base register is word aligned.  */
15353 bool
15354 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15355 {
15356   HOST_WIDE_INT max_offset;
15357
15358   /* Offset must be a multiple of 4 in Thumb mode.  */
15359   if (TARGET_THUMB2 && ((offset & 3) != 0))
15360     return false;
15361
15362   if (TARGET_THUMB2)
15363     max_offset = 1020;
15364   else if (TARGET_ARM)
15365     max_offset = 255;
15366   else
15367     return false;
15368
15369   return ((offset <= max_offset) && (offset >= -max_offset));
15370 }
15371
15372 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15373    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15374    Assumes that the address in the base register RN is word aligned.  Pattern
15375    guarantees that both memory accesses use the same base register,
15376    the offsets are constants within the range, and the gap between the offsets is 4.
15377    If preload complete then check that registers are legal.  WBACK indicates whether
15378    address is updated.  LOAD indicates whether memory access is load or store.  */
15379 bool
15380 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15381                        bool wback, bool load)
15382 {
15383   unsigned int t, t2, n;
15384
15385   if (!reload_completed)
15386     return true;
15387
15388   if (!offset_ok_for_ldrd_strd (offset))
15389     return false;
15390
15391   t = REGNO (rt);
15392   t2 = REGNO (rt2);
15393   n = REGNO (rn);
15394
15395   if ((TARGET_THUMB2)
15396       && ((wback && (n == t || n == t2))
15397           || (t == SP_REGNUM)
15398           || (t == PC_REGNUM)
15399           || (t2 == SP_REGNUM)
15400           || (t2 == PC_REGNUM)
15401           || (!load && (n == PC_REGNUM))
15402           || (load && (t == t2))
15403           /* Triggers Cortex-M3 LDRD errata.  */
15404           || (!wback && load && fix_cm3_ldrd && (n == t))))
15405     return false;
15406
15407   if ((TARGET_ARM)
15408       && ((wback && (n == t || n == t2))
15409           || (t2 == PC_REGNUM)
15410           || (t % 2 != 0)   /* First destination register is not even.  */
15411           || (t2 != t + 1)
15412           /* PC can be used as base register (for offset addressing only),
15413              but it is depricated.  */
15414           || (n == PC_REGNUM)))
15415     return false;
15416
15417   return true;
15418 }
15419
15420 /* Return true if a 64-bit access with alignment ALIGN and with a
15421    constant offset OFFSET from the base pointer is permitted on this
15422    architecture.  */
15423 static bool
15424 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15425 {
15426   return (unaligned_access
15427           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15428           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15429 }
15430
15431 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15432    operand MEM's address contains an immediate offset from the base
15433    register and has no side effects, in which case it sets BASE,
15434    OFFSET and ALIGN accordingly.  */
15435 static bool
15436 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15437 {
15438   rtx addr;
15439
15440   gcc_assert (base != NULL && offset != NULL);
15441
15442   /* TODO: Handle more general memory operand patterns, such as
15443      PRE_DEC and PRE_INC.  */
15444
15445   if (side_effects_p (mem))
15446     return false;
15447
15448   /* Can't deal with subregs.  */
15449   if (GET_CODE (mem) == SUBREG)
15450     return false;
15451
15452   gcc_assert (MEM_P (mem));
15453
15454   *offset = const0_rtx;
15455   *align = MEM_ALIGN (mem);
15456
15457   addr = XEXP (mem, 0);
15458
15459   /* If addr isn't valid for DImode, then we can't handle it.  */
15460   if (!arm_legitimate_address_p (DImode, addr,
15461                                  reload_in_progress || reload_completed))
15462     return false;
15463
15464   if (REG_P (addr))
15465     {
15466       *base = addr;
15467       return true;
15468     }
15469   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15470     {
15471       *base = XEXP (addr, 0);
15472       *offset = XEXP (addr, 1);
15473       return (REG_P (*base) && CONST_INT_P (*offset));
15474     }
15475
15476   return false;
15477 }
15478
15479 /* Called from a peephole2 to replace two word-size accesses with a
15480    single LDRD/STRD instruction.  Returns true iff we can generate a
15481    new instruction sequence.  That is, both accesses use the same base
15482    register and the gap between constant offsets is 4.  This function
15483    may reorder its operands to match ldrd/strd RTL templates.
15484    OPERANDS are the operands found by the peephole matcher;
15485    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15486    corresponding memory operands.  LOAD indicaates whether the access
15487    is load or store.  CONST_STORE indicates a store of constant
15488    integer values held in OPERANDS[4,5] and assumes that the pattern
15489    is of length 4 insn, for the purpose of checking dead registers.
15490    COMMUTE indicates that register operands may be reordered.  */
15491 bool
15492 gen_operands_ldrd_strd (rtx *operands, bool load,
15493                         bool const_store, bool commute)
15494 {
15495   int nops = 2;
15496   HOST_WIDE_INT offsets[2], offset, align[2];
15497   rtx base = NULL_RTX;
15498   rtx cur_base, cur_offset, tmp;
15499   int i, gap;
15500   HARD_REG_SET regset;
15501
15502   gcc_assert (!const_store || !load);
15503   /* Check that the memory references are immediate offsets from the
15504      same base register.  Extract the base register, the destination
15505      registers, and the corresponding memory offsets.  */
15506   for (i = 0; i < nops; i++)
15507     {
15508       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15509                                  &align[i]))
15510         return false;
15511
15512       if (i == 0)
15513         base = cur_base;
15514       else if (REGNO (base) != REGNO (cur_base))
15515         return false;
15516
15517       offsets[i] = INTVAL (cur_offset);
15518       if (GET_CODE (operands[i]) == SUBREG)
15519         {
15520           tmp = SUBREG_REG (operands[i]);
15521           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15522           operands[i] = tmp;
15523         }
15524     }
15525
15526   /* Make sure there is no dependency between the individual loads.  */
15527   if (load && REGNO (operands[0]) == REGNO (base))
15528     return false; /* RAW */
15529
15530   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15531     return false; /* WAW */
15532
15533   /* If the same input register is used in both stores
15534      when storing different constants, try to find a free register.
15535      For example, the code
15536         mov r0, 0
15537         str r0, [r2]
15538         mov r0, 1
15539         str r0, [r2, #4]
15540      can be transformed into
15541         mov r1, 0
15542         mov r0, 1
15543         strd r1, r0, [r2]
15544      in Thumb mode assuming that r1 is free.
15545      For ARM mode do the same but only if the starting register
15546      can be made to be even.  */
15547   if (const_store
15548       && REGNO (operands[0]) == REGNO (operands[1])
15549       && INTVAL (operands[4]) != INTVAL (operands[5]))
15550     {
15551     if (TARGET_THUMB2)
15552       {
15553         CLEAR_HARD_REG_SET (regset);
15554         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15555         if (tmp == NULL_RTX)
15556           return false;
15557
15558         /* Use the new register in the first load to ensure that
15559            if the original input register is not dead after peephole,
15560            then it will have the correct constant value.  */
15561         operands[0] = tmp;
15562       }
15563     else if (TARGET_ARM)
15564       {
15565         int regno = REGNO (operands[0]);
15566         if (!peep2_reg_dead_p (4, operands[0]))
15567           {
15568             /* When the input register is even and is not dead after the
15569                pattern, it has to hold the second constant but we cannot
15570                form a legal STRD in ARM mode with this register as the second
15571                register.  */
15572             if (regno % 2 == 0)
15573               return false;
15574
15575             /* Is regno-1 free? */
15576             SET_HARD_REG_SET (regset);
15577             CLEAR_HARD_REG_BIT(regset, regno - 1);
15578             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15579             if (tmp == NULL_RTX)
15580               return false;
15581
15582             operands[0] = tmp;
15583           }
15584         else
15585           {
15586             /* Find a DImode register.  */
15587             CLEAR_HARD_REG_SET (regset);
15588             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15589             if (tmp != NULL_RTX)
15590               {
15591                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15592                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15593               }
15594             else
15595               {
15596                 /* Can we use the input register to form a DI register?  */
15597                 SET_HARD_REG_SET (regset);
15598                 CLEAR_HARD_REG_BIT(regset,
15599                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15600                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15601                 if (tmp == NULL_RTX)
15602                   return false;
15603                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15604               }
15605           }
15606
15607         gcc_assert (operands[0] != NULL_RTX);
15608         gcc_assert (operands[1] != NULL_RTX);
15609         gcc_assert (REGNO (operands[0]) % 2 == 0);
15610         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15611       }
15612     }
15613
15614   /* Make sure the instructions are ordered with lower memory access first.  */
15615   if (offsets[0] > offsets[1])
15616     {
15617       gap = offsets[0] - offsets[1];
15618       offset = offsets[1];
15619
15620       /* Swap the instructions such that lower memory is accessed first.  */
15621       std::swap (operands[0], operands[1]);
15622       std::swap (operands[2], operands[3]);
15623       std::swap (align[0], align[1]);
15624       if (const_store)
15625         std::swap (operands[4], operands[5]);
15626     }
15627   else
15628     {
15629       gap = offsets[1] - offsets[0];
15630       offset = offsets[0];
15631     }
15632
15633   /* Make sure accesses are to consecutive memory locations.  */
15634   if (gap != 4)
15635     return false;
15636
15637   if (!align_ok_ldrd_strd (align[0], offset))
15638     return false;
15639
15640   /* Make sure we generate legal instructions.  */
15641   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15642                              false, load))
15643     return true;
15644
15645   /* In Thumb state, where registers are almost unconstrained, there
15646      is little hope to fix it.  */
15647   if (TARGET_THUMB2)
15648     return false;
15649
15650   if (load && commute)
15651     {
15652       /* Try reordering registers.  */
15653       std::swap (operands[0], operands[1]);
15654       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15655                                  false, load))
15656         return true;
15657     }
15658
15659   if (const_store)
15660     {
15661       /* If input registers are dead after this pattern, they can be
15662          reordered or replaced by other registers that are free in the
15663          current pattern.  */
15664       if (!peep2_reg_dead_p (4, operands[0])
15665           || !peep2_reg_dead_p (4, operands[1]))
15666         return false;
15667
15668       /* Try to reorder the input registers.  */
15669       /* For example, the code
15670            mov r0, 0
15671            mov r1, 1
15672            str r1, [r2]
15673            str r0, [r2, #4]
15674          can be transformed into
15675            mov r1, 0
15676            mov r0, 1
15677            strd r0, [r2]
15678       */
15679       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15680                                   false, false))
15681         {
15682           std::swap (operands[0], operands[1]);
15683           return true;
15684         }
15685
15686       /* Try to find a free DI register.  */
15687       CLEAR_HARD_REG_SET (regset);
15688       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15689       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15690       while (true)
15691         {
15692           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15693           if (tmp == NULL_RTX)
15694             return false;
15695
15696           /* DREG must be an even-numbered register in DImode.
15697              Split it into SI registers.  */
15698           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15699           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15700           gcc_assert (operands[0] != NULL_RTX);
15701           gcc_assert (operands[1] != NULL_RTX);
15702           gcc_assert (REGNO (operands[0]) % 2 == 0);
15703           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15704
15705           return (operands_ok_ldrd_strd (operands[0], operands[1],
15706                                          base, offset,
15707                                          false, load));
15708         }
15709     }
15710
15711   return false;
15712 }
15713
15714
15715
15716 \f
15717 /* Print a symbolic form of X to the debug file, F.  */
15718 static void
15719 arm_print_value (FILE *f, rtx x)
15720 {
15721   switch (GET_CODE (x))
15722     {
15723     case CONST_INT:
15724       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15725       return;
15726
15727     case CONST_DOUBLE:
15728       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15729       return;
15730
15731     case CONST_VECTOR:
15732       {
15733         int i;
15734
15735         fprintf (f, "<");
15736         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15737           {
15738             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15739             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15740               fputc (',', f);
15741           }
15742         fprintf (f, ">");
15743       }
15744       return;
15745
15746     case CONST_STRING:
15747       fprintf (f, "\"%s\"", XSTR (x, 0));
15748       return;
15749
15750     case SYMBOL_REF:
15751       fprintf (f, "`%s'", XSTR (x, 0));
15752       return;
15753
15754     case LABEL_REF:
15755       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15756       return;
15757
15758     case CONST:
15759       arm_print_value (f, XEXP (x, 0));
15760       return;
15761
15762     case PLUS:
15763       arm_print_value (f, XEXP (x, 0));
15764       fprintf (f, "+");
15765       arm_print_value (f, XEXP (x, 1));
15766       return;
15767
15768     case PC:
15769       fprintf (f, "pc");
15770       return;
15771
15772     default:
15773       fprintf (f, "????");
15774       return;
15775     }
15776 }
15777 \f
15778 /* Routines for manipulation of the constant pool.  */
15779
15780 /* Arm instructions cannot load a large constant directly into a
15781    register; they have to come from a pc relative load.  The constant
15782    must therefore be placed in the addressable range of the pc
15783    relative load.  Depending on the precise pc relative load
15784    instruction the range is somewhere between 256 bytes and 4k.  This
15785    means that we often have to dump a constant inside a function, and
15786    generate code to branch around it.
15787
15788    It is important to minimize this, since the branches will slow
15789    things down and make the code larger.
15790
15791    Normally we can hide the table after an existing unconditional
15792    branch so that there is no interruption of the flow, but in the
15793    worst case the code looks like this:
15794
15795         ldr     rn, L1
15796         ...
15797         b       L2
15798         align
15799         L1:     .long value
15800         L2:
15801         ...
15802
15803         ldr     rn, L3
15804         ...
15805         b       L4
15806         align
15807         L3:     .long value
15808         L4:
15809         ...
15810
15811    We fix this by performing a scan after scheduling, which notices
15812    which instructions need to have their operands fetched from the
15813    constant table and builds the table.
15814
15815    The algorithm starts by building a table of all the constants that
15816    need fixing up and all the natural barriers in the function (places
15817    where a constant table can be dropped without breaking the flow).
15818    For each fixup we note how far the pc-relative replacement will be
15819    able to reach and the offset of the instruction into the function.
15820
15821    Having built the table we then group the fixes together to form
15822    tables that are as large as possible (subject to addressing
15823    constraints) and emit each table of constants after the last
15824    barrier that is within range of all the instructions in the group.
15825    If a group does not contain a barrier, then we forcibly create one
15826    by inserting a jump instruction into the flow.  Once the table has
15827    been inserted, the insns are then modified to reference the
15828    relevant entry in the pool.
15829
15830    Possible enhancements to the algorithm (not implemented) are:
15831
15832    1) For some processors and object formats, there may be benefit in
15833    aligning the pools to the start of cache lines; this alignment
15834    would need to be taken into account when calculating addressability
15835    of a pool.  */
15836
15837 /* These typedefs are located at the start of this file, so that
15838    they can be used in the prototypes there.  This comment is to
15839    remind readers of that fact so that the following structures
15840    can be understood more easily.
15841
15842      typedef struct minipool_node    Mnode;
15843      typedef struct minipool_fixup   Mfix;  */
15844
15845 struct minipool_node
15846 {
15847   /* Doubly linked chain of entries.  */
15848   Mnode * next;
15849   Mnode * prev;
15850   /* The maximum offset into the code that this entry can be placed.  While
15851      pushing fixes for forward references, all entries are sorted in order
15852      of increasing max_address.  */
15853   HOST_WIDE_INT max_address;
15854   /* Similarly for an entry inserted for a backwards ref.  */
15855   HOST_WIDE_INT min_address;
15856   /* The number of fixes referencing this entry.  This can become zero
15857      if we "unpush" an entry.  In this case we ignore the entry when we
15858      come to emit the code.  */
15859   int refcount;
15860   /* The offset from the start of the minipool.  */
15861   HOST_WIDE_INT offset;
15862   /* The value in table.  */
15863   rtx value;
15864   /* The mode of value.  */
15865   machine_mode mode;
15866   /* The size of the value.  With iWMMXt enabled
15867      sizes > 4 also imply an alignment of 8-bytes.  */
15868   int fix_size;
15869 };
15870
15871 struct minipool_fixup
15872 {
15873   Mfix *            next;
15874   rtx_insn *        insn;
15875   HOST_WIDE_INT     address;
15876   rtx *             loc;
15877   machine_mode mode;
15878   int               fix_size;
15879   rtx               value;
15880   Mnode *           minipool;
15881   HOST_WIDE_INT     forwards;
15882   HOST_WIDE_INT     backwards;
15883 };
15884
15885 /* Fixes less than a word need padding out to a word boundary.  */
15886 #define MINIPOOL_FIX_SIZE(mode) \
15887   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15888
15889 static Mnode *  minipool_vector_head;
15890 static Mnode *  minipool_vector_tail;
15891 static rtx_code_label   *minipool_vector_label;
15892 static int      minipool_pad;
15893
15894 /* The linked list of all minipool fixes required for this function.  */
15895 Mfix *          minipool_fix_head;
15896 Mfix *          minipool_fix_tail;
15897 /* The fix entry for the current minipool, once it has been placed.  */
15898 Mfix *          minipool_barrier;
15899
15900 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15901 #define JUMP_TABLES_IN_TEXT_SECTION 0
15902 #endif
15903
15904 static HOST_WIDE_INT
15905 get_jump_table_size (rtx_jump_table_data *insn)
15906 {
15907   /* ADDR_VECs only take room if read-only data does into the text
15908      section.  */
15909   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15910     {
15911       rtx body = PATTERN (insn);
15912       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15913       HOST_WIDE_INT size;
15914       HOST_WIDE_INT modesize;
15915
15916       modesize = GET_MODE_SIZE (GET_MODE (body));
15917       size = modesize * XVECLEN (body, elt);
15918       switch (modesize)
15919         {
15920         case 1:
15921           /* Round up size  of TBB table to a halfword boundary.  */
15922           size = (size + 1) & ~HOST_WIDE_INT_1;
15923           break;
15924         case 2:
15925           /* No padding necessary for TBH.  */
15926           break;
15927         case 4:
15928           /* Add two bytes for alignment on Thumb.  */
15929           if (TARGET_THUMB)
15930             size += 2;
15931           break;
15932         default:
15933           gcc_unreachable ();
15934         }
15935       return size;
15936     }
15937
15938   return 0;
15939 }
15940
15941 /* Return the maximum amount of padding that will be inserted before
15942    label LABEL.  */
15943
15944 static HOST_WIDE_INT
15945 get_label_padding (rtx label)
15946 {
15947   HOST_WIDE_INT align, min_insn_size;
15948
15949   align = 1 << label_to_alignment (label);
15950   min_insn_size = TARGET_THUMB ? 2 : 4;
15951   return align > min_insn_size ? align - min_insn_size : 0;
15952 }
15953
15954 /* Move a minipool fix MP from its current location to before MAX_MP.
15955    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15956    constraints may need updating.  */
15957 static Mnode *
15958 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15959                                HOST_WIDE_INT max_address)
15960 {
15961   /* The code below assumes these are different.  */
15962   gcc_assert (mp != max_mp);
15963
15964   if (max_mp == NULL)
15965     {
15966       if (max_address < mp->max_address)
15967         mp->max_address = max_address;
15968     }
15969   else
15970     {
15971       if (max_address > max_mp->max_address - mp->fix_size)
15972         mp->max_address = max_mp->max_address - mp->fix_size;
15973       else
15974         mp->max_address = max_address;
15975
15976       /* Unlink MP from its current position.  Since max_mp is non-null,
15977        mp->prev must be non-null.  */
15978       mp->prev->next = mp->next;
15979       if (mp->next != NULL)
15980         mp->next->prev = mp->prev;
15981       else
15982         minipool_vector_tail = mp->prev;
15983
15984       /* Re-insert it before MAX_MP.  */
15985       mp->next = max_mp;
15986       mp->prev = max_mp->prev;
15987       max_mp->prev = mp;
15988
15989       if (mp->prev != NULL)
15990         mp->prev->next = mp;
15991       else
15992         minipool_vector_head = mp;
15993     }
15994
15995   /* Save the new entry.  */
15996   max_mp = mp;
15997
15998   /* Scan over the preceding entries and adjust their addresses as
15999      required.  */
16000   while (mp->prev != NULL
16001          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16002     {
16003       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16004       mp = mp->prev;
16005     }
16006
16007   return max_mp;
16008 }
16009
16010 /* Add a constant to the minipool for a forward reference.  Returns the
16011    node added or NULL if the constant will not fit in this pool.  */
16012 static Mnode *
16013 add_minipool_forward_ref (Mfix *fix)
16014 {
16015   /* If set, max_mp is the first pool_entry that has a lower
16016      constraint than the one we are trying to add.  */
16017   Mnode *       max_mp = NULL;
16018   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16019   Mnode *       mp;
16020
16021   /* If the minipool starts before the end of FIX->INSN then this FIX
16022      can not be placed into the current pool.  Furthermore, adding the
16023      new constant pool entry may cause the pool to start FIX_SIZE bytes
16024      earlier.  */
16025   if (minipool_vector_head &&
16026       (fix->address + get_attr_length (fix->insn)
16027        >= minipool_vector_head->max_address - fix->fix_size))
16028     return NULL;
16029
16030   /* Scan the pool to see if a constant with the same value has
16031      already been added.  While we are doing this, also note the
16032      location where we must insert the constant if it doesn't already
16033      exist.  */
16034   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16035     {
16036       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16037           && fix->mode == mp->mode
16038           && (!LABEL_P (fix->value)
16039               || (CODE_LABEL_NUMBER (fix->value)
16040                   == CODE_LABEL_NUMBER (mp->value)))
16041           && rtx_equal_p (fix->value, mp->value))
16042         {
16043           /* More than one fix references this entry.  */
16044           mp->refcount++;
16045           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16046         }
16047
16048       /* Note the insertion point if necessary.  */
16049       if (max_mp == NULL
16050           && mp->max_address > max_address)
16051         max_mp = mp;
16052
16053       /* If we are inserting an 8-bytes aligned quantity and
16054          we have not already found an insertion point, then
16055          make sure that all such 8-byte aligned quantities are
16056          placed at the start of the pool.  */
16057       if (ARM_DOUBLEWORD_ALIGN
16058           && max_mp == NULL
16059           && fix->fix_size >= 8
16060           && mp->fix_size < 8)
16061         {
16062           max_mp = mp;
16063           max_address = mp->max_address;
16064         }
16065     }
16066
16067   /* The value is not currently in the minipool, so we need to create
16068      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16069      the end of the list since the placement is less constrained than
16070      any existing entry.  Otherwise, we insert the new fix before
16071      MAX_MP and, if necessary, adjust the constraints on the other
16072      entries.  */
16073   mp = XNEW (Mnode);
16074   mp->fix_size = fix->fix_size;
16075   mp->mode = fix->mode;
16076   mp->value = fix->value;
16077   mp->refcount = 1;
16078   /* Not yet required for a backwards ref.  */
16079   mp->min_address = -65536;
16080
16081   if (max_mp == NULL)
16082     {
16083       mp->max_address = max_address;
16084       mp->next = NULL;
16085       mp->prev = minipool_vector_tail;
16086
16087       if (mp->prev == NULL)
16088         {
16089           minipool_vector_head = mp;
16090           minipool_vector_label = gen_label_rtx ();
16091         }
16092       else
16093         mp->prev->next = mp;
16094
16095       minipool_vector_tail = mp;
16096     }
16097   else
16098     {
16099       if (max_address > max_mp->max_address - mp->fix_size)
16100         mp->max_address = max_mp->max_address - mp->fix_size;
16101       else
16102         mp->max_address = max_address;
16103
16104       mp->next = max_mp;
16105       mp->prev = max_mp->prev;
16106       max_mp->prev = mp;
16107       if (mp->prev != NULL)
16108         mp->prev->next = mp;
16109       else
16110         minipool_vector_head = mp;
16111     }
16112
16113   /* Save the new entry.  */
16114   max_mp = mp;
16115
16116   /* Scan over the preceding entries and adjust their addresses as
16117      required.  */
16118   while (mp->prev != NULL
16119          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16120     {
16121       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16122       mp = mp->prev;
16123     }
16124
16125   return max_mp;
16126 }
16127
16128 static Mnode *
16129 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16130                                 HOST_WIDE_INT  min_address)
16131 {
16132   HOST_WIDE_INT offset;
16133
16134   /* The code below assumes these are different.  */
16135   gcc_assert (mp != min_mp);
16136
16137   if (min_mp == NULL)
16138     {
16139       if (min_address > mp->min_address)
16140         mp->min_address = min_address;
16141     }
16142   else
16143     {
16144       /* We will adjust this below if it is too loose.  */
16145       mp->min_address = min_address;
16146
16147       /* Unlink MP from its current position.  Since min_mp is non-null,
16148          mp->next must be non-null.  */
16149       mp->next->prev = mp->prev;
16150       if (mp->prev != NULL)
16151         mp->prev->next = mp->next;
16152       else
16153         minipool_vector_head = mp->next;
16154
16155       /* Reinsert it after MIN_MP.  */
16156       mp->prev = min_mp;
16157       mp->next = min_mp->next;
16158       min_mp->next = mp;
16159       if (mp->next != NULL)
16160         mp->next->prev = mp;
16161       else
16162         minipool_vector_tail = mp;
16163     }
16164
16165   min_mp = mp;
16166
16167   offset = 0;
16168   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16169     {
16170       mp->offset = offset;
16171       if (mp->refcount > 0)
16172         offset += mp->fix_size;
16173
16174       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16175         mp->next->min_address = mp->min_address + mp->fix_size;
16176     }
16177
16178   return min_mp;
16179 }
16180
16181 /* Add a constant to the minipool for a backward reference.  Returns the
16182    node added or NULL if the constant will not fit in this pool.
16183
16184    Note that the code for insertion for a backwards reference can be
16185    somewhat confusing because the calculated offsets for each fix do
16186    not take into account the size of the pool (which is still under
16187    construction.  */
16188 static Mnode *
16189 add_minipool_backward_ref (Mfix *fix)
16190 {
16191   /* If set, min_mp is the last pool_entry that has a lower constraint
16192      than the one we are trying to add.  */
16193   Mnode *min_mp = NULL;
16194   /* This can be negative, since it is only a constraint.  */
16195   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16196   Mnode *mp;
16197
16198   /* If we can't reach the current pool from this insn, or if we can't
16199      insert this entry at the end of the pool without pushing other
16200      fixes out of range, then we don't try.  This ensures that we
16201      can't fail later on.  */
16202   if (min_address >= minipool_barrier->address
16203       || (minipool_vector_tail->min_address + fix->fix_size
16204           >= minipool_barrier->address))
16205     return NULL;
16206
16207   /* Scan the pool to see if a constant with the same value has
16208      already been added.  While we are doing this, also note the
16209      location where we must insert the constant if it doesn't already
16210      exist.  */
16211   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16212     {
16213       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16214           && fix->mode == mp->mode
16215           && (!LABEL_P (fix->value)
16216               || (CODE_LABEL_NUMBER (fix->value)
16217                   == CODE_LABEL_NUMBER (mp->value)))
16218           && rtx_equal_p (fix->value, mp->value)
16219           /* Check that there is enough slack to move this entry to the
16220              end of the table (this is conservative).  */
16221           && (mp->max_address
16222               > (minipool_barrier->address
16223                  + minipool_vector_tail->offset
16224                  + minipool_vector_tail->fix_size)))
16225         {
16226           mp->refcount++;
16227           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16228         }
16229
16230       if (min_mp != NULL)
16231         mp->min_address += fix->fix_size;
16232       else
16233         {
16234           /* Note the insertion point if necessary.  */
16235           if (mp->min_address < min_address)
16236             {
16237               /* For now, we do not allow the insertion of 8-byte alignment
16238                  requiring nodes anywhere but at the start of the pool.  */
16239               if (ARM_DOUBLEWORD_ALIGN
16240                   && fix->fix_size >= 8 && mp->fix_size < 8)
16241                 return NULL;
16242               else
16243                 min_mp = mp;
16244             }
16245           else if (mp->max_address
16246                    < minipool_barrier->address + mp->offset + fix->fix_size)
16247             {
16248               /* Inserting before this entry would push the fix beyond
16249                  its maximum address (which can happen if we have
16250                  re-located a forwards fix); force the new fix to come
16251                  after it.  */
16252               if (ARM_DOUBLEWORD_ALIGN
16253                   && fix->fix_size >= 8 && mp->fix_size < 8)
16254                 return NULL;
16255               else
16256                 {
16257                   min_mp = mp;
16258                   min_address = mp->min_address + fix->fix_size;
16259                 }
16260             }
16261           /* Do not insert a non-8-byte aligned quantity before 8-byte
16262              aligned quantities.  */
16263           else if (ARM_DOUBLEWORD_ALIGN
16264                    && fix->fix_size < 8
16265                    && mp->fix_size >= 8)
16266             {
16267               min_mp = mp;
16268               min_address = mp->min_address + fix->fix_size;
16269             }
16270         }
16271     }
16272
16273   /* We need to create a new entry.  */
16274   mp = XNEW (Mnode);
16275   mp->fix_size = fix->fix_size;
16276   mp->mode = fix->mode;
16277   mp->value = fix->value;
16278   mp->refcount = 1;
16279   mp->max_address = minipool_barrier->address + 65536;
16280
16281   mp->min_address = min_address;
16282
16283   if (min_mp == NULL)
16284     {
16285       mp->prev = NULL;
16286       mp->next = minipool_vector_head;
16287
16288       if (mp->next == NULL)
16289         {
16290           minipool_vector_tail = mp;
16291           minipool_vector_label = gen_label_rtx ();
16292         }
16293       else
16294         mp->next->prev = mp;
16295
16296       minipool_vector_head = mp;
16297     }
16298   else
16299     {
16300       mp->next = min_mp->next;
16301       mp->prev = min_mp;
16302       min_mp->next = mp;
16303
16304       if (mp->next != NULL)
16305         mp->next->prev = mp;
16306       else
16307         minipool_vector_tail = mp;
16308     }
16309
16310   /* Save the new entry.  */
16311   min_mp = mp;
16312
16313   if (mp->prev)
16314     mp = mp->prev;
16315   else
16316     mp->offset = 0;
16317
16318   /* Scan over the following entries and adjust their offsets.  */
16319   while (mp->next != NULL)
16320     {
16321       if (mp->next->min_address < mp->min_address + mp->fix_size)
16322         mp->next->min_address = mp->min_address + mp->fix_size;
16323
16324       if (mp->refcount)
16325         mp->next->offset = mp->offset + mp->fix_size;
16326       else
16327         mp->next->offset = mp->offset;
16328
16329       mp = mp->next;
16330     }
16331
16332   return min_mp;
16333 }
16334
16335 static void
16336 assign_minipool_offsets (Mfix *barrier)
16337 {
16338   HOST_WIDE_INT offset = 0;
16339   Mnode *mp;
16340
16341   minipool_barrier = barrier;
16342
16343   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16344     {
16345       mp->offset = offset;
16346
16347       if (mp->refcount > 0)
16348         offset += mp->fix_size;
16349     }
16350 }
16351
16352 /* Output the literal table */
16353 static void
16354 dump_minipool (rtx_insn *scan)
16355 {
16356   Mnode * mp;
16357   Mnode * nmp;
16358   int align64 = 0;
16359
16360   if (ARM_DOUBLEWORD_ALIGN)
16361     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16362       if (mp->refcount > 0 && mp->fix_size >= 8)
16363         {
16364           align64 = 1;
16365           break;
16366         }
16367
16368   if (dump_file)
16369     fprintf (dump_file,
16370              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16371              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16372
16373   scan = emit_label_after (gen_label_rtx (), scan);
16374   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16375   scan = emit_label_after (minipool_vector_label, scan);
16376
16377   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16378     {
16379       if (mp->refcount > 0)
16380         {
16381           if (dump_file)
16382             {
16383               fprintf (dump_file,
16384                        ";;  Offset %u, min %ld, max %ld ",
16385                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16386                        (unsigned long) mp->max_address);
16387               arm_print_value (dump_file, mp->value);
16388               fputc ('\n', dump_file);
16389             }
16390
16391           rtx val = copy_rtx (mp->value);
16392
16393           switch (GET_MODE_SIZE (mp->mode))
16394             {
16395 #ifdef HAVE_consttable_1
16396             case 1:
16397               scan = emit_insn_after (gen_consttable_1 (val), scan);
16398               break;
16399
16400 #endif
16401 #ifdef HAVE_consttable_2
16402             case 2:
16403               scan = emit_insn_after (gen_consttable_2 (val), scan);
16404               break;
16405
16406 #endif
16407 #ifdef HAVE_consttable_4
16408             case 4:
16409               scan = emit_insn_after (gen_consttable_4 (val), scan);
16410               break;
16411
16412 #endif
16413 #ifdef HAVE_consttable_8
16414             case 8:
16415               scan = emit_insn_after (gen_consttable_8 (val), scan);
16416               break;
16417
16418 #endif
16419 #ifdef HAVE_consttable_16
16420             case 16:
16421               scan = emit_insn_after (gen_consttable_16 (val), scan);
16422               break;
16423
16424 #endif
16425             default:
16426               gcc_unreachable ();
16427             }
16428         }
16429
16430       nmp = mp->next;
16431       free (mp);
16432     }
16433
16434   minipool_vector_head = minipool_vector_tail = NULL;
16435   scan = emit_insn_after (gen_consttable_end (), scan);
16436   scan = emit_barrier_after (scan);
16437 }
16438
16439 /* Return the cost of forcibly inserting a barrier after INSN.  */
16440 static int
16441 arm_barrier_cost (rtx_insn *insn)
16442 {
16443   /* Basing the location of the pool on the loop depth is preferable,
16444      but at the moment, the basic block information seems to be
16445      corrupt by this stage of the compilation.  */
16446   int base_cost = 50;
16447   rtx_insn *next = next_nonnote_insn (insn);
16448
16449   if (next != NULL && LABEL_P (next))
16450     base_cost -= 20;
16451
16452   switch (GET_CODE (insn))
16453     {
16454     case CODE_LABEL:
16455       /* It will always be better to place the table before the label, rather
16456          than after it.  */
16457       return 50;
16458
16459     case INSN:
16460     case CALL_INSN:
16461       return base_cost;
16462
16463     case JUMP_INSN:
16464       return base_cost - 10;
16465
16466     default:
16467       return base_cost + 10;
16468     }
16469 }
16470
16471 /* Find the best place in the insn stream in the range
16472    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16473    Create the barrier by inserting a jump and add a new fix entry for
16474    it.  */
16475 static Mfix *
16476 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16477 {
16478   HOST_WIDE_INT count = 0;
16479   rtx_barrier *barrier;
16480   rtx_insn *from = fix->insn;
16481   /* The instruction after which we will insert the jump.  */
16482   rtx_insn *selected = NULL;
16483   int selected_cost;
16484   /* The address at which the jump instruction will be placed.  */
16485   HOST_WIDE_INT selected_address;
16486   Mfix * new_fix;
16487   HOST_WIDE_INT max_count = max_address - fix->address;
16488   rtx_code_label *label = gen_label_rtx ();
16489
16490   selected_cost = arm_barrier_cost (from);
16491   selected_address = fix->address;
16492
16493   while (from && count < max_count)
16494     {
16495       rtx_jump_table_data *tmp;
16496       int new_cost;
16497
16498       /* This code shouldn't have been called if there was a natural barrier
16499          within range.  */
16500       gcc_assert (!BARRIER_P (from));
16501
16502       /* Count the length of this insn.  This must stay in sync with the
16503          code that pushes minipool fixes.  */
16504       if (LABEL_P (from))
16505         count += get_label_padding (from);
16506       else
16507         count += get_attr_length (from);
16508
16509       /* If there is a jump table, add its length.  */
16510       if (tablejump_p (from, NULL, &tmp))
16511         {
16512           count += get_jump_table_size (tmp);
16513
16514           /* Jump tables aren't in a basic block, so base the cost on
16515              the dispatch insn.  If we select this location, we will
16516              still put the pool after the table.  */
16517           new_cost = arm_barrier_cost (from);
16518
16519           if (count < max_count
16520               && (!selected || new_cost <= selected_cost))
16521             {
16522               selected = tmp;
16523               selected_cost = new_cost;
16524               selected_address = fix->address + count;
16525             }
16526
16527           /* Continue after the dispatch table.  */
16528           from = NEXT_INSN (tmp);
16529           continue;
16530         }
16531
16532       new_cost = arm_barrier_cost (from);
16533
16534       if (count < max_count
16535           && (!selected || new_cost <= selected_cost))
16536         {
16537           selected = from;
16538           selected_cost = new_cost;
16539           selected_address = fix->address + count;
16540         }
16541
16542       from = NEXT_INSN (from);
16543     }
16544
16545   /* Make sure that we found a place to insert the jump.  */
16546   gcc_assert (selected);
16547
16548   /* Create a new JUMP_INSN that branches around a barrier.  */
16549   from = emit_jump_insn_after (gen_jump (label), selected);
16550   JUMP_LABEL (from) = label;
16551   barrier = emit_barrier_after (from);
16552   emit_label_after (label, barrier);
16553
16554   /* Create a minipool barrier entry for the new barrier.  */
16555   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16556   new_fix->insn = barrier;
16557   new_fix->address = selected_address;
16558   new_fix->next = fix->next;
16559   fix->next = new_fix;
16560
16561   return new_fix;
16562 }
16563
16564 /* Record that there is a natural barrier in the insn stream at
16565    ADDRESS.  */
16566 static void
16567 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16568 {
16569   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16570
16571   fix->insn = insn;
16572   fix->address = address;
16573
16574   fix->next = NULL;
16575   if (minipool_fix_head != NULL)
16576     minipool_fix_tail->next = fix;
16577   else
16578     minipool_fix_head = fix;
16579
16580   minipool_fix_tail = fix;
16581 }
16582
16583 /* Record INSN, which will need fixing up to load a value from the
16584    minipool.  ADDRESS is the offset of the insn since the start of the
16585    function; LOC is a pointer to the part of the insn which requires
16586    fixing; VALUE is the constant that must be loaded, which is of type
16587    MODE.  */
16588 static void
16589 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16590                    machine_mode mode, rtx value)
16591 {
16592   gcc_assert (!arm_disable_literal_pool);
16593   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16594
16595   fix->insn = insn;
16596   fix->address = address;
16597   fix->loc = loc;
16598   fix->mode = mode;
16599   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16600   fix->value = value;
16601   fix->forwards = get_attr_pool_range (insn);
16602   fix->backwards = get_attr_neg_pool_range (insn);
16603   fix->minipool = NULL;
16604
16605   /* If an insn doesn't have a range defined for it, then it isn't
16606      expecting to be reworked by this code.  Better to stop now than
16607      to generate duff assembly code.  */
16608   gcc_assert (fix->forwards || fix->backwards);
16609
16610   /* If an entry requires 8-byte alignment then assume all constant pools
16611      require 4 bytes of padding.  Trying to do this later on a per-pool
16612      basis is awkward because existing pool entries have to be modified.  */
16613   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16614     minipool_pad = 4;
16615
16616   if (dump_file)
16617     {
16618       fprintf (dump_file,
16619                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16620                GET_MODE_NAME (mode),
16621                INSN_UID (insn), (unsigned long) address,
16622                -1 * (long)fix->backwards, (long)fix->forwards);
16623       arm_print_value (dump_file, fix->value);
16624       fprintf (dump_file, "\n");
16625     }
16626
16627   /* Add it to the chain of fixes.  */
16628   fix->next = NULL;
16629
16630   if (minipool_fix_head != NULL)
16631     minipool_fix_tail->next = fix;
16632   else
16633     minipool_fix_head = fix;
16634
16635   minipool_fix_tail = fix;
16636 }
16637
16638 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16639    Returns the number of insns needed, or 99 if we always want to synthesize
16640    the value.  */
16641 int
16642 arm_max_const_double_inline_cost ()
16643 {
16644   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16645 }
16646
16647 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16648    Returns the number of insns needed, or 99 if we don't know how to
16649    do it.  */
16650 int
16651 arm_const_double_inline_cost (rtx val)
16652 {
16653   rtx lowpart, highpart;
16654   machine_mode mode;
16655
16656   mode = GET_MODE (val);
16657
16658   if (mode == VOIDmode)
16659     mode = DImode;
16660
16661   gcc_assert (GET_MODE_SIZE (mode) == 8);
16662
16663   lowpart = gen_lowpart (SImode, val);
16664   highpart = gen_highpart_mode (SImode, mode, val);
16665
16666   gcc_assert (CONST_INT_P (lowpart));
16667   gcc_assert (CONST_INT_P (highpart));
16668
16669   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16670                             NULL_RTX, NULL_RTX, 0, 0)
16671           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16672                               NULL_RTX, NULL_RTX, 0, 0));
16673 }
16674
16675 /* Cost of loading a SImode constant.  */
16676 static inline int
16677 arm_const_inline_cost (enum rtx_code code, rtx val)
16678 {
16679   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16680                            NULL_RTX, NULL_RTX, 1, 0);
16681 }
16682
16683 /* Return true if it is worthwhile to split a 64-bit constant into two
16684    32-bit operations.  This is the case if optimizing for size, or
16685    if we have load delay slots, or if one 32-bit part can be done with
16686    a single data operation.  */
16687 bool
16688 arm_const_double_by_parts (rtx val)
16689 {
16690   machine_mode mode = GET_MODE (val);
16691   rtx part;
16692
16693   if (optimize_size || arm_ld_sched)
16694     return true;
16695
16696   if (mode == VOIDmode)
16697     mode = DImode;
16698
16699   part = gen_highpart_mode (SImode, mode, val);
16700
16701   gcc_assert (CONST_INT_P (part));
16702
16703   if (const_ok_for_arm (INTVAL (part))
16704       || const_ok_for_arm (~INTVAL (part)))
16705     return true;
16706
16707   part = gen_lowpart (SImode, val);
16708
16709   gcc_assert (CONST_INT_P (part));
16710
16711   if (const_ok_for_arm (INTVAL (part))
16712       || const_ok_for_arm (~INTVAL (part)))
16713     return true;
16714
16715   return false;
16716 }
16717
16718 /* Return true if it is possible to inline both the high and low parts
16719    of a 64-bit constant into 32-bit data processing instructions.  */
16720 bool
16721 arm_const_double_by_immediates (rtx val)
16722 {
16723   machine_mode mode = GET_MODE (val);
16724   rtx part;
16725
16726   if (mode == VOIDmode)
16727     mode = DImode;
16728
16729   part = gen_highpart_mode (SImode, mode, val);
16730
16731   gcc_assert (CONST_INT_P (part));
16732
16733   if (!const_ok_for_arm (INTVAL (part)))
16734     return false;
16735
16736   part = gen_lowpart (SImode, val);
16737
16738   gcc_assert (CONST_INT_P (part));
16739
16740   if (!const_ok_for_arm (INTVAL (part)))
16741     return false;
16742
16743   return true;
16744 }
16745
16746 /* Scan INSN and note any of its operands that need fixing.
16747    If DO_PUSHES is false we do not actually push any of the fixups
16748    needed.  */
16749 static void
16750 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16751 {
16752   int opno;
16753
16754   extract_constrain_insn (insn);
16755
16756   if (recog_data.n_alternatives == 0)
16757     return;
16758
16759   /* Fill in recog_op_alt with information about the constraints of
16760      this insn.  */
16761   preprocess_constraints (insn);
16762
16763   const operand_alternative *op_alt = which_op_alt ();
16764   for (opno = 0; opno < recog_data.n_operands; opno++)
16765     {
16766       /* Things we need to fix can only occur in inputs.  */
16767       if (recog_data.operand_type[opno] != OP_IN)
16768         continue;
16769
16770       /* If this alternative is a memory reference, then any mention
16771          of constants in this alternative is really to fool reload
16772          into allowing us to accept one there.  We need to fix them up
16773          now so that we output the right code.  */
16774       if (op_alt[opno].memory_ok)
16775         {
16776           rtx op = recog_data.operand[opno];
16777
16778           if (CONSTANT_P (op))
16779             {
16780               if (do_pushes)
16781                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16782                                    recog_data.operand_mode[opno], op);
16783             }
16784           else if (MEM_P (op)
16785                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16786                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16787             {
16788               if (do_pushes)
16789                 {
16790                   rtx cop = avoid_constant_pool_reference (op);
16791
16792                   /* Casting the address of something to a mode narrower
16793                      than a word can cause avoid_constant_pool_reference()
16794                      to return the pool reference itself.  That's no good to
16795                      us here.  Lets just hope that we can use the
16796                      constant pool value directly.  */
16797                   if (op == cop)
16798                     cop = get_pool_constant (XEXP (op, 0));
16799
16800                   push_minipool_fix (insn, address,
16801                                      recog_data.operand_loc[opno],
16802                                      recog_data.operand_mode[opno], cop);
16803                 }
16804
16805             }
16806         }
16807     }
16808
16809   return;
16810 }
16811
16812 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16813    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16814    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16815    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16816    or four masks, depending on whether it is being computed for a
16817    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16818    respectively.  The tree for the type of the argument or a field within an
16819    argument is passed in ARG_TYPE, the current register this argument or field
16820    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16821    argument or field starts at is passed in STARTING_BIT and the last used bit
16822    is kept in LAST_USED_BIT which is also updated accordingly.  */
16823
16824 static unsigned HOST_WIDE_INT
16825 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16826                                uint32_t * padding_bits_to_clear,
16827                                unsigned starting_bit, int * last_used_bit)
16828
16829 {
16830   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16831
16832   if (TREE_CODE (arg_type) == RECORD_TYPE)
16833     {
16834       unsigned current_bit = starting_bit;
16835       tree field;
16836       long int offset, size;
16837
16838
16839       field = TYPE_FIELDS (arg_type);
16840       while (field)
16841         {
16842           /* The offset within a structure is always an offset from
16843              the start of that structure.  Make sure we take that into the
16844              calculation of the register based offset that we use here.  */
16845           offset = starting_bit;
16846           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16847           offset %= 32;
16848
16849           /* This is the actual size of the field, for bitfields this is the
16850              bitfield width and not the container size.  */
16851           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16852
16853           if (*last_used_bit != offset)
16854             {
16855               if (offset < *last_used_bit)
16856                 {
16857                   /* This field's offset is before the 'last_used_bit', that
16858                      means this field goes on the next register.  So we need to
16859                      pad the rest of the current register and increase the
16860                      register number.  */
16861                   uint32_t mask;
16862                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16863                   mask++;
16864
16865                   padding_bits_to_clear[*regno] |= mask;
16866                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16867                   (*regno)++;
16868                 }
16869               else
16870                 {
16871                   /* Otherwise we pad the bits between the last field's end and
16872                      the start of the new field.  */
16873                   uint32_t mask;
16874
16875                   mask = ((uint32_t)-1) >> (32 - offset);
16876                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16877                   padding_bits_to_clear[*regno] |= mask;
16878                 }
16879               current_bit = offset;
16880             }
16881
16882           /* Calculate further padding bits for inner structs/unions too.  */
16883           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16884             {
16885               *last_used_bit = current_bit;
16886               not_to_clear_reg_mask
16887                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16888                                                   padding_bits_to_clear, offset,
16889                                                   last_used_bit);
16890             }
16891           else
16892             {
16893               /* Update 'current_bit' with this field's size.  If the
16894                  'current_bit' lies in a subsequent register, update 'regno' and
16895                  reset 'current_bit' to point to the current bit in that new
16896                  register.  */
16897               current_bit += size;
16898               while (current_bit >= 32)
16899                 {
16900                   current_bit-=32;
16901                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16902                   (*regno)++;
16903                 }
16904               *last_used_bit = current_bit;
16905             }
16906
16907           field = TREE_CHAIN (field);
16908         }
16909       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16910     }
16911   else if (TREE_CODE (arg_type) == UNION_TYPE)
16912     {
16913       tree field, field_t;
16914       int i, regno_t, field_size;
16915       int max_reg = -1;
16916       int max_bit = -1;
16917       uint32_t mask;
16918       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16919         = {-1, -1, -1, -1};
16920
16921       /* To compute the padding bits in a union we only consider bits as
16922          padding bits if they are always either a padding bit or fall outside a
16923          fields size for all fields in the union.  */
16924       field = TYPE_FIELDS (arg_type);
16925       while (field)
16926         {
16927           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16928             = {0U, 0U, 0U, 0U};
16929           int last_used_bit_t = *last_used_bit;
16930           regno_t = *regno;
16931           field_t = TREE_TYPE (field);
16932
16933           /* If the field's type is either a record or a union make sure to
16934              compute their padding bits too.  */
16935           if (RECORD_OR_UNION_TYPE_P (field_t))
16936             not_to_clear_reg_mask
16937               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16938                                                 &padding_bits_to_clear_t[0],
16939                                                 starting_bit, &last_used_bit_t);
16940           else
16941             {
16942               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16943               regno_t = (field_size / 32) + *regno;
16944               last_used_bit_t = (starting_bit + field_size) % 32;
16945             }
16946
16947           for (i = *regno; i < regno_t; i++)
16948             {
16949               /* For all but the last register used by this field only keep the
16950                  padding bits that were padding bits in this field.  */
16951               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16952             }
16953
16954             /* For the last register, keep all padding bits that were padding
16955                bits in this field and any padding bits that are still valid
16956                as padding bits but fall outside of this field's size.  */
16957             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16958             padding_bits_to_clear_res[regno_t]
16959               &= padding_bits_to_clear_t[regno_t] | mask;
16960
16961           /* Update the maximum size of the fields in terms of registers used
16962              ('max_reg') and the 'last_used_bit' in said register.  */
16963           if (max_reg < regno_t)
16964             {
16965               max_reg = regno_t;
16966               max_bit = last_used_bit_t;
16967             }
16968           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16969             max_bit = last_used_bit_t;
16970
16971           field = TREE_CHAIN (field);
16972         }
16973
16974       /* Update the current padding_bits_to_clear using the intersection of the
16975          padding bits of all the fields.  */
16976       for (i=*regno; i < max_reg; i++)
16977         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16978
16979       /* Do not keep trailing padding bits, we do not know yet whether this
16980          is the end of the argument.  */
16981       mask = ((uint32_t) 1 << max_bit) - 1;
16982       padding_bits_to_clear[max_reg]
16983         |= padding_bits_to_clear_res[max_reg] & mask;
16984
16985       *regno = max_reg;
16986       *last_used_bit = max_bit;
16987     }
16988   else
16989     /* This function should only be used for structs and unions.  */
16990     gcc_unreachable ();
16991
16992   return not_to_clear_reg_mask;
16993 }
16994
16995 /* In the context of ARMv8-M Security Extensions, this function is used for both
16996    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16997    registers are used when returning or passing arguments, which is then
16998    returned as a mask.  It will also compute a mask to indicate padding/unused
16999    bits for each of these registers, and passes this through the
17000    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17001    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17002    the starting register used to pass this argument or return value is passed
17003    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17004    for struct and union types.  */
17005
17006 static unsigned HOST_WIDE_INT
17007 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17008                              uint32_t * padding_bits_to_clear)
17009
17010 {
17011   int last_used_bit = 0;
17012   unsigned HOST_WIDE_INT not_to_clear_mask;
17013
17014   if (RECORD_OR_UNION_TYPE_P (arg_type))
17015     {
17016       not_to_clear_mask
17017         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17018                                          padding_bits_to_clear, 0,
17019                                          &last_used_bit);
17020
17021
17022       /* If the 'last_used_bit' is not zero, that means we are still using a
17023          part of the last 'regno'.  In such cases we must clear the trailing
17024          bits.  Otherwise we are not using regno and we should mark it as to
17025          clear.  */
17026       if (last_used_bit != 0)
17027         padding_bits_to_clear[regno]
17028           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17029       else
17030         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17031     }
17032   else
17033     {
17034       not_to_clear_mask = 0;
17035       /* We are not dealing with structs nor unions.  So these arguments may be
17036          passed in floating point registers too.  In some cases a BLKmode is
17037          used when returning or passing arguments in multiple VFP registers.  */
17038       if (GET_MODE (arg_rtx) == BLKmode)
17039         {
17040           int i, arg_regs;
17041           rtx reg;
17042
17043           /* This should really only occur when dealing with the hard-float
17044              ABI.  */
17045           gcc_assert (TARGET_HARD_FLOAT_ABI);
17046
17047           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17048             {
17049               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17050               gcc_assert (REG_P (reg));
17051
17052               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17053
17054               /* If we are dealing with DF mode, make sure we don't
17055                  clear either of the registers it addresses.  */
17056               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17057               if (arg_regs > 1)
17058                 {
17059                   unsigned HOST_WIDE_INT mask;
17060                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17061                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17062                   not_to_clear_mask |= mask;
17063                 }
17064             }
17065         }
17066       else
17067         {
17068           /* Otherwise we can rely on the MODE to determine how many registers
17069              are being used by this argument.  */
17070           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17071           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17072           if (arg_regs > 1)
17073             {
17074               unsigned HOST_WIDE_INT
17075               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17076               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17077               not_to_clear_mask |= mask;
17078             }
17079         }
17080     }
17081
17082   return not_to_clear_mask;
17083 }
17084
17085 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17086    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17087    are to be fully cleared, using the value in register CLEARING_REG if more
17088    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17089    the bits that needs to be cleared in caller-saved core registers, with
17090    SCRATCH_REG used as a scratch register for that clearing.
17091
17092    NOTE: one of three following assertions must hold:
17093    - SCRATCH_REG is a low register
17094    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17095      in TO_CLEAR_BITMAP)
17096    - CLEARING_REG is a low register.  */
17097
17098 static void
17099 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17100                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17101 {
17102   bool saved_clearing = false;
17103   rtx saved_clearing_reg = NULL_RTX;
17104   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17105
17106   gcc_assert (arm_arch_cmse);
17107
17108   if (!bitmap_empty_p (to_clear_bitmap))
17109     {
17110       minregno = bitmap_first_set_bit (to_clear_bitmap);
17111       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17112     }
17113   clearing_regno = REGNO (clearing_reg);
17114
17115   /* Clear padding bits.  */
17116   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17117   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17118     {
17119       uint64_t mask;
17120       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17121
17122       if (padding_bits_to_clear[i] == 0)
17123         continue;
17124
17125       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17126          CLEARING_REG as scratch.  */
17127       if (TARGET_THUMB1
17128           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17129         {
17130           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17131              such that we can use clearing_reg to clear the unused bits in the
17132              arguments.  */
17133           if ((clearing_regno > maxregno
17134                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17135               && !saved_clearing)
17136             {
17137               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17138               emit_move_insn (scratch_reg, clearing_reg);
17139               saved_clearing = true;
17140               saved_clearing_reg = scratch_reg;
17141             }
17142           scratch_reg = clearing_reg;
17143         }
17144
17145       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17146       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17147       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17148
17149       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17150       mask = (~padding_bits_to_clear[i]) >> 16;
17151       rtx16 = gen_int_mode (16, SImode);
17152       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17153       if (mask)
17154         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17155
17156       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17157     }
17158   if (saved_clearing)
17159     emit_move_insn (clearing_reg, saved_clearing_reg);
17160
17161
17162   /* Clear full registers.  */
17163
17164   /* If not marked for clearing, clearing_reg already does not contain
17165      any secret.  */
17166   if (clearing_regno <= maxregno
17167       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17168     {
17169       emit_move_insn (clearing_reg, const0_rtx);
17170       emit_use (clearing_reg);
17171       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17172     }
17173
17174   for (regno = minregno; regno <= maxregno; regno++)
17175     {
17176       if (!bitmap_bit_p (to_clear_bitmap, regno))
17177         continue;
17178
17179       if (IS_VFP_REGNUM (regno))
17180         {
17181           /* If regno is an even vfp register and its successor is also to
17182              be cleared, use vmov.  */
17183           if (TARGET_VFP_DOUBLE
17184               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17185               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17186             {
17187               emit_move_insn (gen_rtx_REG (DFmode, regno),
17188                               CONST1_RTX (DFmode));
17189               emit_use (gen_rtx_REG (DFmode, regno));
17190               regno++;
17191             }
17192           else
17193             {
17194               emit_move_insn (gen_rtx_REG (SFmode, regno),
17195                               CONST1_RTX (SFmode));
17196               emit_use (gen_rtx_REG (SFmode, regno));
17197             }
17198         }
17199       else
17200         {
17201           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17202           emit_use (gen_rtx_REG (SImode, regno));
17203         }
17204     }
17205 }
17206
17207 /* Clears caller saved registers not used to pass arguments before a
17208    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17209    registers is done in __gnu_cmse_nonsecure_call libcall.
17210    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17211
17212 static void
17213 cmse_nonsecure_call_clear_caller_saved (void)
17214 {
17215   basic_block bb;
17216
17217   FOR_EACH_BB_FN (bb, cfun)
17218     {
17219       rtx_insn *insn;
17220
17221       FOR_BB_INSNS (bb, insn)
17222         {
17223           unsigned address_regnum, regno, maxregno =
17224             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17225           auto_sbitmap to_clear_bitmap (maxregno + 1);
17226           rtx_insn *seq;
17227           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17228           rtx address;
17229           CUMULATIVE_ARGS args_so_far_v;
17230           cumulative_args_t args_so_far;
17231           tree arg_type, fntype;
17232           bool first_param = true;
17233           function_args_iterator args_iter;
17234           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17235
17236           if (!NONDEBUG_INSN_P (insn))
17237             continue;
17238
17239           if (!CALL_P (insn))
17240             continue;
17241
17242           pat = PATTERN (insn);
17243           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17244           call = XVECEXP (pat, 0, 0);
17245
17246           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17247           if (GET_CODE (call) == SET)
17248               call = SET_SRC (call);
17249
17250           /* Check if it is a cmse_nonsecure_call.  */
17251           unspec = XEXP (call, 0);
17252           if (GET_CODE (unspec) != UNSPEC
17253               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17254             continue;
17255
17256           /* Determine the caller-saved registers we need to clear.  */
17257           bitmap_clear (to_clear_bitmap);
17258           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17259
17260           /* Only look at the caller-saved floating point registers in case of
17261              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17262              lazy store and loads which clear both caller- and callee-saved
17263              registers.  */
17264           if (TARGET_HARD_FLOAT_ABI)
17265             {
17266               auto_sbitmap float_bitmap (maxregno + 1);
17267
17268               bitmap_clear (float_bitmap);
17269               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17270                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17271               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17272             }
17273
17274           /* Make sure the register used to hold the function address is not
17275              cleared.  */
17276           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17277           gcc_assert (MEM_P (address));
17278           gcc_assert (REG_P (XEXP (address, 0)));
17279           address_regnum = REGNO (XEXP (address, 0));
17280           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17281             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17282
17283           /* Set basic block of call insn so that df rescan is performed on
17284              insns inserted here.  */
17285           set_block_for_insn (insn, bb);
17286           df_set_flags (DF_DEFER_INSN_RESCAN);
17287           start_sequence ();
17288
17289           /* Make sure the scheduler doesn't schedule other insns beyond
17290              here.  */
17291           emit_insn (gen_blockage ());
17292
17293           /* Walk through all arguments and clear registers appropriately.
17294           */
17295           fntype = TREE_TYPE (MEM_EXPR (address));
17296           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17297                                     NULL_TREE);
17298           args_so_far = pack_cumulative_args (&args_so_far_v);
17299           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17300             {
17301               rtx arg_rtx;
17302               uint64_t to_clear_args_mask;
17303               machine_mode arg_mode = TYPE_MODE (arg_type);
17304
17305               if (VOID_TYPE_P (arg_type))
17306                 continue;
17307
17308               if (!first_param)
17309                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17310                                           true);
17311
17312               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17313                                           true);
17314               gcc_assert (REG_P (arg_rtx));
17315               to_clear_args_mask
17316                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17317                                              REGNO (arg_rtx),
17318                                              &padding_bits_to_clear[0]);
17319               if (to_clear_args_mask)
17320                 {
17321                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17322                     {
17323                       if (to_clear_args_mask & (1ULL << regno))
17324                         bitmap_clear_bit (to_clear_bitmap, regno);
17325                     }
17326                 }
17327
17328               first_param = false;
17329             }
17330
17331           /* We use right shift and left shift to clear the LSB of the address
17332              we jump to instead of using bic, to avoid having to use an extra
17333              register on Thumb-1.  */
17334           clearing_reg = XEXP (address, 0);
17335           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17336           emit_insn (gen_rtx_SET (clearing_reg, shift));
17337           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17338           emit_insn (gen_rtx_SET (clearing_reg, shift));
17339
17340           /* Clear caller-saved registers that leak before doing a non-secure
17341              call.  */
17342           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17343           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17344                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17345
17346           seq = get_insns ();
17347           end_sequence ();
17348           emit_insn_before (seq, insn);
17349         }
17350     }
17351 }
17352
17353 /* Rewrite move insn into subtract of 0 if the condition codes will
17354    be useful in next conditional jump insn.  */
17355
17356 static void
17357 thumb1_reorg (void)
17358 {
17359   basic_block bb;
17360
17361   FOR_EACH_BB_FN (bb, cfun)
17362     {
17363       rtx dest, src;
17364       rtx cmp, op0, op1, set = NULL;
17365       rtx_insn *prev, *insn = BB_END (bb);
17366       bool insn_clobbered = false;
17367
17368       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17369         insn = PREV_INSN (insn);
17370
17371       /* Find the last cbranchsi4_insn in basic block BB.  */
17372       if (insn == BB_HEAD (bb)
17373           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17374         continue;
17375
17376       /* Get the register with which we are comparing.  */
17377       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17378       op0 = XEXP (cmp, 0);
17379       op1 = XEXP (cmp, 1);
17380
17381       /* Check that comparison is against ZERO.  */
17382       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17383         continue;
17384
17385       /* Find the first flag setting insn before INSN in basic block BB.  */
17386       gcc_assert (insn != BB_HEAD (bb));
17387       for (prev = PREV_INSN (insn);
17388            (!insn_clobbered
17389             && prev != BB_HEAD (bb)
17390             && (NOTE_P (prev)
17391                 || DEBUG_INSN_P (prev)
17392                 || ((set = single_set (prev)) != NULL
17393                     && get_attr_conds (prev) == CONDS_NOCOND)));
17394            prev = PREV_INSN (prev))
17395         {
17396           if (reg_set_p (op0, prev))
17397             insn_clobbered = true;
17398         }
17399
17400       /* Skip if op0 is clobbered by insn other than prev. */
17401       if (insn_clobbered)
17402         continue;
17403
17404       if (!set)
17405         continue;
17406
17407       dest = SET_DEST (set);
17408       src = SET_SRC (set);
17409       if (!low_register_operand (dest, SImode)
17410           || !low_register_operand (src, SImode))
17411         continue;
17412
17413       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17414          in INSN.  Both src and dest of the move insn are checked.  */
17415       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17416         {
17417           dest = copy_rtx (dest);
17418           src = copy_rtx (src);
17419           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17420           PATTERN (prev) = gen_rtx_SET (dest, src);
17421           INSN_CODE (prev) = -1;
17422           /* Set test register in INSN to dest.  */
17423           XEXP (cmp, 0) = copy_rtx (dest);
17424           INSN_CODE (insn) = -1;
17425         }
17426     }
17427 }
17428
17429 /* Convert instructions to their cc-clobbering variant if possible, since
17430    that allows us to use smaller encodings.  */
17431
17432 static void
17433 thumb2_reorg (void)
17434 {
17435   basic_block bb;
17436   regset_head live;
17437
17438   INIT_REG_SET (&live);
17439
17440   /* We are freeing block_for_insn in the toplev to keep compatibility
17441      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17442   compute_bb_for_insn ();
17443   df_analyze ();
17444
17445   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17446
17447   FOR_EACH_BB_FN (bb, cfun)
17448     {
17449       if ((current_tune->disparage_flag_setting_t16_encodings
17450            == tune_params::DISPARAGE_FLAGS_ALL)
17451           && optimize_bb_for_speed_p (bb))
17452         continue;
17453
17454       rtx_insn *insn;
17455       Convert_Action action = SKIP;
17456       Convert_Action action_for_partial_flag_setting
17457         = ((current_tune->disparage_flag_setting_t16_encodings
17458             != tune_params::DISPARAGE_FLAGS_NEITHER)
17459            && optimize_bb_for_speed_p (bb))
17460           ? SKIP : CONV;
17461
17462       COPY_REG_SET (&live, DF_LR_OUT (bb));
17463       df_simulate_initialize_backwards (bb, &live);
17464       FOR_BB_INSNS_REVERSE (bb, insn)
17465         {
17466           if (NONJUMP_INSN_P (insn)
17467               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17468               && GET_CODE (PATTERN (insn)) == SET)
17469             {
17470               action = SKIP;
17471               rtx pat = PATTERN (insn);
17472               rtx dst = XEXP (pat, 0);
17473               rtx src = XEXP (pat, 1);
17474               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17475
17476               if (UNARY_P (src) || BINARY_P (src))
17477                   op0 = XEXP (src, 0);
17478
17479               if (BINARY_P (src))
17480                   op1 = XEXP (src, 1);
17481
17482               if (low_register_operand (dst, SImode))
17483                 {
17484                   switch (GET_CODE (src))
17485                     {
17486                     case PLUS:
17487                       /* Adding two registers and storing the result
17488                          in the first source is already a 16-bit
17489                          operation.  */
17490                       if (rtx_equal_p (dst, op0)
17491                           && register_operand (op1, SImode))
17492                         break;
17493
17494                       if (low_register_operand (op0, SImode))
17495                         {
17496                           /* ADDS <Rd>,<Rn>,<Rm>  */
17497                           if (low_register_operand (op1, SImode))
17498                             action = CONV;
17499                           /* ADDS <Rdn>,#<imm8>  */
17500                           /* SUBS <Rdn>,#<imm8>  */
17501                           else if (rtx_equal_p (dst, op0)
17502                                    && CONST_INT_P (op1)
17503                                    && IN_RANGE (INTVAL (op1), -255, 255))
17504                             action = CONV;
17505                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17506                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17507                           else if (CONST_INT_P (op1)
17508                                    && IN_RANGE (INTVAL (op1), -7, 7))
17509                             action = CONV;
17510                         }
17511                       /* ADCS <Rd>, <Rn>  */
17512                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17513                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17514                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17515                                                        SImode)
17516                               && COMPARISON_P (op1)
17517                               && cc_register (XEXP (op1, 0), VOIDmode)
17518                               && maybe_get_arm_condition_code (op1) == ARM_CS
17519                               && XEXP (op1, 1) == const0_rtx)
17520                         action = CONV;
17521                       break;
17522
17523                     case MINUS:
17524                       /* RSBS <Rd>,<Rn>,#0
17525                          Not handled here: see NEG below.  */
17526                       /* SUBS <Rd>,<Rn>,#<imm3>
17527                          SUBS <Rdn>,#<imm8>
17528                          Not handled here: see PLUS above.  */
17529                       /* SUBS <Rd>,<Rn>,<Rm>  */
17530                       if (low_register_operand (op0, SImode)
17531                           && low_register_operand (op1, SImode))
17532                             action = CONV;
17533                       break;
17534
17535                     case MULT:
17536                       /* MULS <Rdm>,<Rn>,<Rdm>
17537                          As an exception to the rule, this is only used
17538                          when optimizing for size since MULS is slow on all
17539                          known implementations.  We do not even want to use
17540                          MULS in cold code, if optimizing for speed, so we
17541                          test the global flag here.  */
17542                       if (!optimize_size)
17543                         break;
17544                       /* Fall through.  */
17545                     case AND:
17546                     case IOR:
17547                     case XOR:
17548                       /* ANDS <Rdn>,<Rm>  */
17549                       if (rtx_equal_p (dst, op0)
17550                           && low_register_operand (op1, SImode))
17551                         action = action_for_partial_flag_setting;
17552                       else if (rtx_equal_p (dst, op1)
17553                                && low_register_operand (op0, SImode))
17554                         action = action_for_partial_flag_setting == SKIP
17555                                  ? SKIP : SWAP_CONV;
17556                       break;
17557
17558                     case ASHIFTRT:
17559                     case ASHIFT:
17560                     case LSHIFTRT:
17561                       /* ASRS <Rdn>,<Rm> */
17562                       /* LSRS <Rdn>,<Rm> */
17563                       /* LSLS <Rdn>,<Rm> */
17564                       if (rtx_equal_p (dst, op0)
17565                           && low_register_operand (op1, SImode))
17566                         action = action_for_partial_flag_setting;
17567                       /* ASRS <Rd>,<Rm>,#<imm5> */
17568                       /* LSRS <Rd>,<Rm>,#<imm5> */
17569                       /* LSLS <Rd>,<Rm>,#<imm5> */
17570                       else if (low_register_operand (op0, SImode)
17571                                && CONST_INT_P (op1)
17572                                && IN_RANGE (INTVAL (op1), 0, 31))
17573                         action = action_for_partial_flag_setting;
17574                       break;
17575
17576                     case ROTATERT:
17577                       /* RORS <Rdn>,<Rm>  */
17578                       if (rtx_equal_p (dst, op0)
17579                           && low_register_operand (op1, SImode))
17580                         action = action_for_partial_flag_setting;
17581                       break;
17582
17583                     case NOT:
17584                       /* MVNS <Rd>,<Rm>  */
17585                       if (low_register_operand (op0, SImode))
17586                         action = action_for_partial_flag_setting;
17587                       break;
17588
17589                     case NEG:
17590                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17591                       if (low_register_operand (op0, SImode))
17592                         action = CONV;
17593                       break;
17594
17595                     case CONST_INT:
17596                       /* MOVS <Rd>,#<imm8>  */
17597                       if (CONST_INT_P (src)
17598                           && IN_RANGE (INTVAL (src), 0, 255))
17599                         action = action_for_partial_flag_setting;
17600                       break;
17601
17602                     case REG:
17603                       /* MOVS and MOV<c> with registers have different
17604                          encodings, so are not relevant here.  */
17605                       break;
17606
17607                     default:
17608                       break;
17609                     }
17610                 }
17611
17612               if (action != SKIP)
17613                 {
17614                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17615                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17616                   rtvec vec;
17617
17618                   if (action == SWAP_CONV)
17619                     {
17620                       src = copy_rtx (src);
17621                       XEXP (src, 0) = op1;
17622                       XEXP (src, 1) = op0;
17623                       pat = gen_rtx_SET (dst, src);
17624                       vec = gen_rtvec (2, pat, clobber);
17625                     }
17626                   else /* action == CONV */
17627                     vec = gen_rtvec (2, pat, clobber);
17628
17629                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17630                   INSN_CODE (insn) = -1;
17631                 }
17632             }
17633
17634           if (NONDEBUG_INSN_P (insn))
17635             df_simulate_one_insn_backwards (bb, insn, &live);
17636         }
17637     }
17638
17639   CLEAR_REG_SET (&live);
17640 }
17641
17642 /* Gcc puts the pool in the wrong place for ARM, since we can only
17643    load addresses a limited distance around the pc.  We do some
17644    special munging to move the constant pool values to the correct
17645    point in the code.  */
17646 static void
17647 arm_reorg (void)
17648 {
17649   rtx_insn *insn;
17650   HOST_WIDE_INT address = 0;
17651   Mfix * fix;
17652
17653   if (use_cmse)
17654     cmse_nonsecure_call_clear_caller_saved ();
17655   if (TARGET_THUMB1)
17656     thumb1_reorg ();
17657   else if (TARGET_THUMB2)
17658     thumb2_reorg ();
17659
17660   /* Ensure all insns that must be split have been split at this point.
17661      Otherwise, the pool placement code below may compute incorrect
17662      insn lengths.  Note that when optimizing, all insns have already
17663      been split at this point.  */
17664   if (!optimize)
17665     split_all_insns_noflow ();
17666
17667   /* Make sure we do not attempt to create a literal pool even though it should
17668      no longer be necessary to create any.  */
17669   if (arm_disable_literal_pool)
17670     return ;
17671
17672   minipool_fix_head = minipool_fix_tail = NULL;
17673
17674   /* The first insn must always be a note, or the code below won't
17675      scan it properly.  */
17676   insn = get_insns ();
17677   gcc_assert (NOTE_P (insn));
17678   minipool_pad = 0;
17679
17680   /* Scan all the insns and record the operands that will need fixing.  */
17681   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17682     {
17683       if (BARRIER_P (insn))
17684         push_minipool_barrier (insn, address);
17685       else if (INSN_P (insn))
17686         {
17687           rtx_jump_table_data *table;
17688
17689           note_invalid_constants (insn, address, true);
17690           address += get_attr_length (insn);
17691
17692           /* If the insn is a vector jump, add the size of the table
17693              and skip the table.  */
17694           if (tablejump_p (insn, NULL, &table))
17695             {
17696               address += get_jump_table_size (table);
17697               insn = table;
17698             }
17699         }
17700       else if (LABEL_P (insn))
17701         /* Add the worst-case padding due to alignment.  We don't add
17702            the _current_ padding because the minipool insertions
17703            themselves might change it.  */
17704         address += get_label_padding (insn);
17705     }
17706
17707   fix = minipool_fix_head;
17708
17709   /* Now scan the fixups and perform the required changes.  */
17710   while (fix)
17711     {
17712       Mfix * ftmp;
17713       Mfix * fdel;
17714       Mfix *  last_added_fix;
17715       Mfix * last_barrier = NULL;
17716       Mfix * this_fix;
17717
17718       /* Skip any further barriers before the next fix.  */
17719       while (fix && BARRIER_P (fix->insn))
17720         fix = fix->next;
17721
17722       /* No more fixes.  */
17723       if (fix == NULL)
17724         break;
17725
17726       last_added_fix = NULL;
17727
17728       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17729         {
17730           if (BARRIER_P (ftmp->insn))
17731             {
17732               if (ftmp->address >= minipool_vector_head->max_address)
17733                 break;
17734
17735               last_barrier = ftmp;
17736             }
17737           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17738             break;
17739
17740           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17741         }
17742
17743       /* If we found a barrier, drop back to that; any fixes that we
17744          could have reached but come after the barrier will now go in
17745          the next mini-pool.  */
17746       if (last_barrier != NULL)
17747         {
17748           /* Reduce the refcount for those fixes that won't go into this
17749              pool after all.  */
17750           for (fdel = last_barrier->next;
17751                fdel && fdel != ftmp;
17752                fdel = fdel->next)
17753             {
17754               fdel->minipool->refcount--;
17755               fdel->minipool = NULL;
17756             }
17757
17758           ftmp = last_barrier;
17759         }
17760       else
17761         {
17762           /* ftmp is first fix that we can't fit into this pool and
17763              there no natural barriers that we could use.  Insert a
17764              new barrier in the code somewhere between the previous
17765              fix and this one, and arrange to jump around it.  */
17766           HOST_WIDE_INT max_address;
17767
17768           /* The last item on the list of fixes must be a barrier, so
17769              we can never run off the end of the list of fixes without
17770              last_barrier being set.  */
17771           gcc_assert (ftmp);
17772
17773           max_address = minipool_vector_head->max_address;
17774           /* Check that there isn't another fix that is in range that
17775              we couldn't fit into this pool because the pool was
17776              already too large: we need to put the pool before such an
17777              instruction.  The pool itself may come just after the
17778              fix because create_fix_barrier also allows space for a
17779              jump instruction.  */
17780           if (ftmp->address < max_address)
17781             max_address = ftmp->address + 1;
17782
17783           last_barrier = create_fix_barrier (last_added_fix, max_address);
17784         }
17785
17786       assign_minipool_offsets (last_barrier);
17787
17788       while (ftmp)
17789         {
17790           if (!BARRIER_P (ftmp->insn)
17791               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17792                   == NULL))
17793             break;
17794
17795           ftmp = ftmp->next;
17796         }
17797
17798       /* Scan over the fixes we have identified for this pool, fixing them
17799          up and adding the constants to the pool itself.  */
17800       for (this_fix = fix; this_fix && ftmp != this_fix;
17801            this_fix = this_fix->next)
17802         if (!BARRIER_P (this_fix->insn))
17803           {
17804             rtx addr
17805               = plus_constant (Pmode,
17806                                gen_rtx_LABEL_REF (VOIDmode,
17807                                                   minipool_vector_label),
17808                                this_fix->minipool->offset);
17809             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17810           }
17811
17812       dump_minipool (last_barrier->insn);
17813       fix = ftmp;
17814     }
17815
17816   /* From now on we must synthesize any constants that we can't handle
17817      directly.  This can happen if the RTL gets split during final
17818      instruction generation.  */
17819   cfun->machine->after_arm_reorg = 1;
17820
17821   /* Free the minipool memory.  */
17822   obstack_free (&minipool_obstack, minipool_startobj);
17823 }
17824 \f
17825 /* Routines to output assembly language.  */
17826
17827 /* Return string representation of passed in real value.  */
17828 static const char *
17829 fp_const_from_val (REAL_VALUE_TYPE *r)
17830 {
17831   if (!fp_consts_inited)
17832     init_fp_table ();
17833
17834   gcc_assert (real_equal (r, &value_fp0));
17835   return "0";
17836 }
17837
17838 /* OPERANDS[0] is the entire list of insns that constitute pop,
17839    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17840    is in the list, UPDATE is true iff the list contains explicit
17841    update of base register.  */
17842 void
17843 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17844                          bool update)
17845 {
17846   int i;
17847   char pattern[100];
17848   int offset;
17849   const char *conditional;
17850   int num_saves = XVECLEN (operands[0], 0);
17851   unsigned int regno;
17852   unsigned int regno_base = REGNO (operands[1]);
17853   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17854
17855   offset = 0;
17856   offset += update ? 1 : 0;
17857   offset += return_pc ? 1 : 0;
17858
17859   /* Is the base register in the list?  */
17860   for (i = offset; i < num_saves; i++)
17861     {
17862       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17863       /* If SP is in the list, then the base register must be SP.  */
17864       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17865       /* If base register is in the list, there must be no explicit update.  */
17866       if (regno == regno_base)
17867         gcc_assert (!update);
17868     }
17869
17870   conditional = reverse ? "%?%D0" : "%?%d0";
17871   /* Can't use POP if returning from an interrupt.  */
17872   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17873     sprintf (pattern, "pop%s\t{", conditional);
17874   else
17875     {
17876       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17877          It's just a convention, their semantics are identical.  */
17878       if (regno_base == SP_REGNUM)
17879         sprintf (pattern, "ldmfd%s\t", conditional);
17880       else if (update)
17881         sprintf (pattern, "ldmia%s\t", conditional);
17882       else
17883         sprintf (pattern, "ldm%s\t", conditional);
17884
17885       strcat (pattern, reg_names[regno_base]);
17886       if (update)
17887         strcat (pattern, "!, {");
17888       else
17889         strcat (pattern, ", {");
17890     }
17891
17892   /* Output the first destination register.  */
17893   strcat (pattern,
17894           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17895
17896   /* Output the rest of the destination registers.  */
17897   for (i = offset + 1; i < num_saves; i++)
17898     {
17899       strcat (pattern, ", ");
17900       strcat (pattern,
17901               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17902     }
17903
17904   strcat (pattern, "}");
17905
17906   if (interrupt_p && return_pc)
17907     strcat (pattern, "^");
17908
17909   output_asm_insn (pattern, &cond);
17910 }
17911
17912
17913 /* Output the assembly for a store multiple.  */
17914
17915 const char *
17916 vfp_output_vstmd (rtx * operands)
17917 {
17918   char pattern[100];
17919   int p;
17920   int base;
17921   int i;
17922   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17923                    ? XEXP (operands[0], 0)
17924                    : XEXP (XEXP (operands[0], 0), 0);
17925   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17926
17927   if (push_p)
17928     strcpy (pattern, "vpush%?.64\t{%P1");
17929   else
17930     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17931
17932   p = strlen (pattern);
17933
17934   gcc_assert (REG_P (operands[1]));
17935
17936   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17937   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17938     {
17939       p += sprintf (&pattern[p], ", d%d", base + i);
17940     }
17941   strcpy (&pattern[p], "}");
17942
17943   output_asm_insn (pattern, operands);
17944   return "";
17945 }
17946
17947
17948 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17949    number of bytes pushed.  */
17950
17951 static int
17952 vfp_emit_fstmd (int base_reg, int count)
17953 {
17954   rtx par;
17955   rtx dwarf;
17956   rtx tmp, reg;
17957   int i;
17958
17959   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17960      register pairs are stored by a store multiple insn.  We avoid this
17961      by pushing an extra pair.  */
17962   if (count == 2 && !arm_arch6)
17963     {
17964       if (base_reg == LAST_VFP_REGNUM - 3)
17965         base_reg -= 2;
17966       count++;
17967     }
17968
17969   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17970      larger stores into multiple parts (up to a maximum of two, in
17971      practice).  */
17972   if (count > 16)
17973     {
17974       int saved;
17975       /* NOTE: base_reg is an internal register number, so each D register
17976          counts as 2.  */
17977       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17978       saved += vfp_emit_fstmd (base_reg, 16);
17979       return saved;
17980     }
17981
17982   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17983   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17984
17985   reg = gen_rtx_REG (DFmode, base_reg);
17986   base_reg += 2;
17987
17988   XVECEXP (par, 0, 0)
17989     = gen_rtx_SET (gen_frame_mem
17990                    (BLKmode,
17991                     gen_rtx_PRE_MODIFY (Pmode,
17992                                         stack_pointer_rtx,
17993                                         plus_constant
17994                                         (Pmode, stack_pointer_rtx,
17995                                          - (count * 8)))
17996                     ),
17997                    gen_rtx_UNSPEC (BLKmode,
17998                                    gen_rtvec (1, reg),
17999                                    UNSPEC_PUSH_MULT));
18000
18001   tmp = gen_rtx_SET (stack_pointer_rtx,
18002                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18003   RTX_FRAME_RELATED_P (tmp) = 1;
18004   XVECEXP (dwarf, 0, 0) = tmp;
18005
18006   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18007   RTX_FRAME_RELATED_P (tmp) = 1;
18008   XVECEXP (dwarf, 0, 1) = tmp;
18009
18010   for (i = 1; i < count; i++)
18011     {
18012       reg = gen_rtx_REG (DFmode, base_reg);
18013       base_reg += 2;
18014       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18015
18016       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18017                                         plus_constant (Pmode,
18018                                                        stack_pointer_rtx,
18019                                                        i * 8)),
18020                          reg);
18021       RTX_FRAME_RELATED_P (tmp) = 1;
18022       XVECEXP (dwarf, 0, i + 1) = tmp;
18023     }
18024
18025   par = emit_insn (par);
18026   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18027   RTX_FRAME_RELATED_P (par) = 1;
18028
18029   return count * 8;
18030 }
18031
18032 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18033    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18034
18035 bool
18036 detect_cmse_nonsecure_call (tree addr)
18037 {
18038   if (!addr)
18039     return FALSE;
18040
18041   tree fntype = TREE_TYPE (addr);
18042   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18043                                     TYPE_ATTRIBUTES (fntype)))
18044     return TRUE;
18045   return FALSE;
18046 }
18047
18048
18049 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18050    the call target.  */
18051
18052 void
18053 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18054 {
18055   rtx insn;
18056
18057   insn = emit_call_insn (pat);
18058
18059   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18060      If the call might use such an entry, add a use of the PIC register
18061      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18062   if (TARGET_VXWORKS_RTP
18063       && flag_pic
18064       && !sibcall
18065       && GET_CODE (addr) == SYMBOL_REF
18066       && (SYMBOL_REF_DECL (addr)
18067           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18068           : !SYMBOL_REF_LOCAL_P (addr)))
18069     {
18070       require_pic_register ();
18071       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18072     }
18073
18074   if (TARGET_AAPCS_BASED)
18075     {
18076       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18077          linker.  We need to add an IP clobber to allow setting
18078          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18079          is not needed since it's a fixed register.  */
18080       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18081       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18082     }
18083 }
18084
18085 /* Output a 'call' insn.  */
18086 const char *
18087 output_call (rtx *operands)
18088 {
18089   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
18090
18091   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18092   if (REGNO (operands[0]) == LR_REGNUM)
18093     {
18094       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18095       output_asm_insn ("mov%?\t%0, %|lr", operands);
18096     }
18097
18098   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18099
18100   if (TARGET_INTERWORK || arm_arch4t)
18101     output_asm_insn ("bx%?\t%0", operands);
18102   else
18103     output_asm_insn ("mov%?\t%|pc, %0", operands);
18104
18105   return "";
18106 }
18107
18108 /* Output a move from arm registers to arm registers of a long double
18109    OPERANDS[0] is the destination.
18110    OPERANDS[1] is the source.  */
18111 const char *
18112 output_mov_long_double_arm_from_arm (rtx *operands)
18113 {
18114   /* We have to be careful here because the two might overlap.  */
18115   int dest_start = REGNO (operands[0]);
18116   int src_start = REGNO (operands[1]);
18117   rtx ops[2];
18118   int i;
18119
18120   if (dest_start < src_start)
18121     {
18122       for (i = 0; i < 3; i++)
18123         {
18124           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18125           ops[1] = gen_rtx_REG (SImode, src_start + i);
18126           output_asm_insn ("mov%?\t%0, %1", ops);
18127         }
18128     }
18129   else
18130     {
18131       for (i = 2; i >= 0; i--)
18132         {
18133           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18134           ops[1] = gen_rtx_REG (SImode, src_start + i);
18135           output_asm_insn ("mov%?\t%0, %1", ops);
18136         }
18137     }
18138
18139   return "";
18140 }
18141
18142 void
18143 arm_emit_movpair (rtx dest, rtx src)
18144  {
18145   /* If the src is an immediate, simplify it.  */
18146   if (CONST_INT_P (src))
18147     {
18148       HOST_WIDE_INT val = INTVAL (src);
18149       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18150       if ((val >> 16) & 0x0000ffff)
18151         {
18152           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18153                                                GEN_INT (16)),
18154                          GEN_INT ((val >> 16) & 0x0000ffff));
18155           rtx_insn *insn = get_last_insn ();
18156           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18157         }
18158       return;
18159     }
18160    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18161    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18162    rtx_insn *insn = get_last_insn ();
18163    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18164  }
18165
18166 /* Output a move between double words.  It must be REG<-MEM
18167    or MEM<-REG.  */
18168 const char *
18169 output_move_double (rtx *operands, bool emit, int *count)
18170 {
18171   enum rtx_code code0 = GET_CODE (operands[0]);
18172   enum rtx_code code1 = GET_CODE (operands[1]);
18173   rtx otherops[3];
18174   if (count)
18175     *count = 1;
18176
18177   /* The only case when this might happen is when
18178      you are looking at the length of a DImode instruction
18179      that has an invalid constant in it.  */
18180   if (code0 == REG && code1 != MEM)
18181     {
18182       gcc_assert (!emit);
18183       *count = 2;
18184       return "";
18185     }
18186
18187   if (code0 == REG)
18188     {
18189       unsigned int reg0 = REGNO (operands[0]);
18190
18191       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18192
18193       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18194
18195       switch (GET_CODE (XEXP (operands[1], 0)))
18196         {
18197         case REG:
18198
18199           if (emit)
18200             {
18201               if (TARGET_LDRD
18202                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18203                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18204               else
18205                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18206             }
18207           break;
18208
18209         case PRE_INC:
18210           gcc_assert (TARGET_LDRD);
18211           if (emit)
18212             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18213           break;
18214
18215         case PRE_DEC:
18216           if (emit)
18217             {
18218               if (TARGET_LDRD)
18219                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18220               else
18221                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18222             }
18223           break;
18224
18225         case POST_INC:
18226           if (emit)
18227             {
18228               if (TARGET_LDRD)
18229                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18230               else
18231                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18232             }
18233           break;
18234
18235         case POST_DEC:
18236           gcc_assert (TARGET_LDRD);
18237           if (emit)
18238             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18239           break;
18240
18241         case PRE_MODIFY:
18242         case POST_MODIFY:
18243           /* Autoicrement addressing modes should never have overlapping
18244              base and destination registers, and overlapping index registers
18245              are already prohibited, so this doesn't need to worry about
18246              fix_cm3_ldrd.  */
18247           otherops[0] = operands[0];
18248           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18249           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18250
18251           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18252             {
18253               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18254                 {
18255                   /* Registers overlap so split out the increment.  */
18256                   if (emit)
18257                     {
18258                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18259                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18260                     }
18261                   if (count)
18262                     *count = 2;
18263                 }
18264               else
18265                 {
18266                   /* Use a single insn if we can.
18267                      FIXME: IWMMXT allows offsets larger than ldrd can
18268                      handle, fix these up with a pair of ldr.  */
18269                   if (TARGET_THUMB2
18270                       || !CONST_INT_P (otherops[2])
18271                       || (INTVAL (otherops[2]) > -256
18272                           && INTVAL (otherops[2]) < 256))
18273                     {
18274                       if (emit)
18275                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18276                     }
18277                   else
18278                     {
18279                       if (emit)
18280                         {
18281                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18282                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18283                         }
18284                       if (count)
18285                         *count = 2;
18286
18287                     }
18288                 }
18289             }
18290           else
18291             {
18292               /* Use a single insn if we can.
18293                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18294                  fix these up with a pair of ldr.  */
18295               if (TARGET_THUMB2
18296                   || !CONST_INT_P (otherops[2])
18297                   || (INTVAL (otherops[2]) > -256
18298                       && INTVAL (otherops[2]) < 256))
18299                 {
18300                   if (emit)
18301                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18302                 }
18303               else
18304                 {
18305                   if (emit)
18306                     {
18307                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18308                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18309                     }
18310                   if (count)
18311                     *count = 2;
18312                 }
18313             }
18314           break;
18315
18316         case LABEL_REF:
18317         case CONST:
18318           /* We might be able to use ldrd %0, %1 here.  However the range is
18319              different to ldr/adr, and it is broken on some ARMv7-M
18320              implementations.  */
18321           /* Use the second register of the pair to avoid problematic
18322              overlap.  */
18323           otherops[1] = operands[1];
18324           if (emit)
18325             output_asm_insn ("adr%?\t%0, %1", otherops);
18326           operands[1] = otherops[0];
18327           if (emit)
18328             {
18329               if (TARGET_LDRD)
18330                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18331               else
18332                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18333             }
18334
18335           if (count)
18336             *count = 2;
18337           break;
18338
18339           /* ??? This needs checking for thumb2.  */
18340         default:
18341           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18342                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18343             {
18344               otherops[0] = operands[0];
18345               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18346               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18347
18348               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18349                 {
18350                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18351                     {
18352                       switch ((int) INTVAL (otherops[2]))
18353                         {
18354                         case -8:
18355                           if (emit)
18356                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18357                           return "";
18358                         case -4:
18359                           if (TARGET_THUMB2)
18360                             break;
18361                           if (emit)
18362                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18363                           return "";
18364                         case 4:
18365                           if (TARGET_THUMB2)
18366                             break;
18367                           if (emit)
18368                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18369                           return "";
18370                         }
18371                     }
18372                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18373                   operands[1] = otherops[0];
18374                   if (TARGET_LDRD
18375                       && (REG_P (otherops[2])
18376                           || TARGET_THUMB2
18377                           || (CONST_INT_P (otherops[2])
18378                               && INTVAL (otherops[2]) > -256
18379                               && INTVAL (otherops[2]) < 256)))
18380                     {
18381                       if (reg_overlap_mentioned_p (operands[0],
18382                                                    otherops[2]))
18383                         {
18384                           /* Swap base and index registers over to
18385                              avoid a conflict.  */
18386                           std::swap (otherops[1], otherops[2]);
18387                         }
18388                       /* If both registers conflict, it will usually
18389                          have been fixed by a splitter.  */
18390                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18391                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18392                         {
18393                           if (emit)
18394                             {
18395                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18396                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18397                             }
18398                           if (count)
18399                             *count = 2;
18400                         }
18401                       else
18402                         {
18403                           otherops[0] = operands[0];
18404                           if (emit)
18405                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18406                         }
18407                       return "";
18408                     }
18409
18410                   if (CONST_INT_P (otherops[2]))
18411                     {
18412                       if (emit)
18413                         {
18414                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18415                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18416                           else
18417                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18418                         }
18419                     }
18420                   else
18421                     {
18422                       if (emit)
18423                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18424                     }
18425                 }
18426               else
18427                 {
18428                   if (emit)
18429                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18430                 }
18431
18432               if (count)
18433                 *count = 2;
18434
18435               if (TARGET_LDRD)
18436                 return "ldrd%?\t%0, [%1]";
18437
18438               return "ldmia%?\t%1, %M0";
18439             }
18440           else
18441             {
18442               otherops[1] = adjust_address (operands[1], SImode, 4);
18443               /* Take care of overlapping base/data reg.  */
18444               if (reg_mentioned_p (operands[0], operands[1]))
18445                 {
18446                   if (emit)
18447                     {
18448                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18449                       output_asm_insn ("ldr%?\t%0, %1", operands);
18450                     }
18451                   if (count)
18452                     *count = 2;
18453
18454                 }
18455               else
18456                 {
18457                   if (emit)
18458                     {
18459                       output_asm_insn ("ldr%?\t%0, %1", operands);
18460                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18461                     }
18462                   if (count)
18463                     *count = 2;
18464                 }
18465             }
18466         }
18467     }
18468   else
18469     {
18470       /* Constraints should ensure this.  */
18471       gcc_assert (code0 == MEM && code1 == REG);
18472       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18473                   || (TARGET_ARM && TARGET_LDRD));
18474
18475       switch (GET_CODE (XEXP (operands[0], 0)))
18476         {
18477         case REG:
18478           if (emit)
18479             {
18480               if (TARGET_LDRD)
18481                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18482               else
18483                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18484             }
18485           break;
18486
18487         case PRE_INC:
18488           gcc_assert (TARGET_LDRD);
18489           if (emit)
18490             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18491           break;
18492
18493         case PRE_DEC:
18494           if (emit)
18495             {
18496               if (TARGET_LDRD)
18497                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18498               else
18499                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18500             }
18501           break;
18502
18503         case POST_INC:
18504           if (emit)
18505             {
18506               if (TARGET_LDRD)
18507                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18508               else
18509                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18510             }
18511           break;
18512
18513         case POST_DEC:
18514           gcc_assert (TARGET_LDRD);
18515           if (emit)
18516             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18517           break;
18518
18519         case PRE_MODIFY:
18520         case POST_MODIFY:
18521           otherops[0] = operands[1];
18522           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18523           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18524
18525           /* IWMMXT allows offsets larger than ldrd can handle,
18526              fix these up with a pair of ldr.  */
18527           if (!TARGET_THUMB2
18528               && CONST_INT_P (otherops[2])
18529               && (INTVAL(otherops[2]) <= -256
18530                   || INTVAL(otherops[2]) >= 256))
18531             {
18532               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18533                 {
18534                   if (emit)
18535                     {
18536                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18537                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18538                     }
18539                   if (count)
18540                     *count = 2;
18541                 }
18542               else
18543                 {
18544                   if (emit)
18545                     {
18546                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18547                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18548                     }
18549                   if (count)
18550                     *count = 2;
18551                 }
18552             }
18553           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18554             {
18555               if (emit)
18556                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18557             }
18558           else
18559             {
18560               if (emit)
18561                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18562             }
18563           break;
18564
18565         case PLUS:
18566           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18567           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18568             {
18569               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18570                 {
18571                 case -8:
18572                   if (emit)
18573                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18574                   return "";
18575
18576                 case -4:
18577                   if (TARGET_THUMB2)
18578                     break;
18579                   if (emit)
18580                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18581                   return "";
18582
18583                 case 4:
18584                   if (TARGET_THUMB2)
18585                     break;
18586                   if (emit)
18587                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18588                   return "";
18589                 }
18590             }
18591           if (TARGET_LDRD
18592               && (REG_P (otherops[2])
18593                   || TARGET_THUMB2
18594                   || (CONST_INT_P (otherops[2])
18595                       && INTVAL (otherops[2]) > -256
18596                       && INTVAL (otherops[2]) < 256)))
18597             {
18598               otherops[0] = operands[1];
18599               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18600               if (emit)
18601                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18602               return "";
18603             }
18604           /* Fall through */
18605
18606         default:
18607           otherops[0] = adjust_address (operands[0], SImode, 4);
18608           otherops[1] = operands[1];
18609           if (emit)
18610             {
18611               output_asm_insn ("str%?\t%1, %0", operands);
18612               output_asm_insn ("str%?\t%H1, %0", otherops);
18613             }
18614           if (count)
18615             *count = 2;
18616         }
18617     }
18618
18619   return "";
18620 }
18621
18622 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18623    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18624
18625 const char *
18626 output_move_quad (rtx *operands)
18627 {
18628   if (REG_P (operands[0]))
18629     {
18630       /* Load, or reg->reg move.  */
18631
18632       if (MEM_P (operands[1]))
18633         {
18634           switch (GET_CODE (XEXP (operands[1], 0)))
18635             {
18636             case REG:
18637               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18638               break;
18639
18640             case LABEL_REF:
18641             case CONST:
18642               output_asm_insn ("adr%?\t%0, %1", operands);
18643               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18644               break;
18645
18646             default:
18647               gcc_unreachable ();
18648             }
18649         }
18650       else
18651         {
18652           rtx ops[2];
18653           int dest, src, i;
18654
18655           gcc_assert (REG_P (operands[1]));
18656
18657           dest = REGNO (operands[0]);
18658           src = REGNO (operands[1]);
18659
18660           /* This seems pretty dumb, but hopefully GCC won't try to do it
18661              very often.  */
18662           if (dest < src)
18663             for (i = 0; i < 4; i++)
18664               {
18665                 ops[0] = gen_rtx_REG (SImode, dest + i);
18666                 ops[1] = gen_rtx_REG (SImode, src + i);
18667                 output_asm_insn ("mov%?\t%0, %1", ops);
18668               }
18669           else
18670             for (i = 3; i >= 0; i--)
18671               {
18672                 ops[0] = gen_rtx_REG (SImode, dest + i);
18673                 ops[1] = gen_rtx_REG (SImode, src + i);
18674                 output_asm_insn ("mov%?\t%0, %1", ops);
18675               }
18676         }
18677     }
18678   else
18679     {
18680       gcc_assert (MEM_P (operands[0]));
18681       gcc_assert (REG_P (operands[1]));
18682       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18683
18684       switch (GET_CODE (XEXP (operands[0], 0)))
18685         {
18686         case REG:
18687           output_asm_insn ("stm%?\t%m0, %M1", operands);
18688           break;
18689
18690         default:
18691           gcc_unreachable ();
18692         }
18693     }
18694
18695   return "";
18696 }
18697
18698 /* Output a VFP load or store instruction.  */
18699
18700 const char *
18701 output_move_vfp (rtx *operands)
18702 {
18703   rtx reg, mem, addr, ops[2];
18704   int load = REG_P (operands[0]);
18705   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18706   int sp = (!TARGET_VFP_FP16INST
18707             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18708   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18709   const char *templ;
18710   char buff[50];
18711   machine_mode mode;
18712
18713   reg = operands[!load];
18714   mem = operands[load];
18715
18716   mode = GET_MODE (reg);
18717
18718   gcc_assert (REG_P (reg));
18719   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18720   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18721               || mode == SFmode
18722               || mode == DFmode
18723               || mode == HImode
18724               || mode == SImode
18725               || mode == DImode
18726               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18727   gcc_assert (MEM_P (mem));
18728
18729   addr = XEXP (mem, 0);
18730
18731   switch (GET_CODE (addr))
18732     {
18733     case PRE_DEC:
18734       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18735       ops[0] = XEXP (addr, 0);
18736       ops[1] = reg;
18737       break;
18738
18739     case POST_INC:
18740       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18741       ops[0] = XEXP (addr, 0);
18742       ops[1] = reg;
18743       break;
18744
18745     default:
18746       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18747       ops[0] = reg;
18748       ops[1] = mem;
18749       break;
18750     }
18751
18752   sprintf (buff, templ,
18753            load ? "ld" : "st",
18754            dp ? "64" : sp ? "32" : "16",
18755            dp ? "P" : "",
18756            integer_p ? "\t%@ int" : "");
18757   output_asm_insn (buff, ops);
18758
18759   return "";
18760 }
18761
18762 /* Output a Neon double-word or quad-word load or store, or a load
18763    or store for larger structure modes.
18764
18765    WARNING: The ordering of elements is weird in big-endian mode,
18766    because the EABI requires that vectors stored in memory appear
18767    as though they were stored by a VSTM, as required by the EABI.
18768    GCC RTL defines element ordering based on in-memory order.
18769    This can be different from the architectural ordering of elements
18770    within a NEON register. The intrinsics defined in arm_neon.h use the
18771    NEON register element ordering, not the GCC RTL element ordering.
18772
18773    For example, the in-memory ordering of a big-endian a quadword
18774    vector with 16-bit elements when stored from register pair {d0,d1}
18775    will be (lowest address first, d0[N] is NEON register element N):
18776
18777      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18778
18779    When necessary, quadword registers (dN, dN+1) are moved to ARM
18780    registers from rN in the order:
18781
18782      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18783
18784    So that STM/LDM can be used on vectors in ARM registers, and the
18785    same memory layout will result as if VSTM/VLDM were used.
18786
18787    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18788    possible, which allows use of appropriate alignment tags.
18789    Note that the choice of "64" is independent of the actual vector
18790    element size; this size simply ensures that the behavior is
18791    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18792
18793    Due to limitations of those instructions, use of VST1.64/VLD1.64
18794    is not possible if:
18795     - the address contains PRE_DEC, or
18796     - the mode refers to more than 4 double-word registers
18797
18798    In those cases, it would be possible to replace VSTM/VLDM by a
18799    sequence of instructions; this is not currently implemented since
18800    this is not certain to actually improve performance.  */
18801
18802 const char *
18803 output_move_neon (rtx *operands)
18804 {
18805   rtx reg, mem, addr, ops[2];
18806   int regno, nregs, load = REG_P (operands[0]);
18807   const char *templ;
18808   char buff[50];
18809   machine_mode mode;
18810
18811   reg = operands[!load];
18812   mem = operands[load];
18813
18814   mode = GET_MODE (reg);
18815
18816   gcc_assert (REG_P (reg));
18817   regno = REGNO (reg);
18818   nregs = REG_NREGS (reg) / 2;
18819   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18820               || NEON_REGNO_OK_FOR_QUAD (regno));
18821   gcc_assert (VALID_NEON_DREG_MODE (mode)
18822               || VALID_NEON_QREG_MODE (mode)
18823               || VALID_NEON_STRUCT_MODE (mode));
18824   gcc_assert (MEM_P (mem));
18825
18826   addr = XEXP (mem, 0);
18827
18828   /* Strip off const from addresses like (const (plus (...))).  */
18829   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18830     addr = XEXP (addr, 0);
18831
18832   switch (GET_CODE (addr))
18833     {
18834     case POST_INC:
18835       /* We have to use vldm / vstm for too-large modes.  */
18836       if (nregs > 4)
18837         {
18838           templ = "v%smia%%?\t%%0!, %%h1";
18839           ops[0] = XEXP (addr, 0);
18840         }
18841       else
18842         {
18843           templ = "v%s1.64\t%%h1, %%A0";
18844           ops[0] = mem;
18845         }
18846       ops[1] = reg;
18847       break;
18848
18849     case PRE_DEC:
18850       /* We have to use vldm / vstm in this case, since there is no
18851          pre-decrement form of the vld1 / vst1 instructions.  */
18852       templ = "v%smdb%%?\t%%0!, %%h1";
18853       ops[0] = XEXP (addr, 0);
18854       ops[1] = reg;
18855       break;
18856
18857     case POST_MODIFY:
18858       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18859       gcc_unreachable ();
18860
18861     case REG:
18862       /* We have to use vldm / vstm for too-large modes.  */
18863       if (nregs > 1)
18864         {
18865           if (nregs > 4)
18866             templ = "v%smia%%?\t%%m0, %%h1";
18867           else
18868             templ = "v%s1.64\t%%h1, %%A0";
18869
18870           ops[0] = mem;
18871           ops[1] = reg;
18872           break;
18873         }
18874       /* Fall through.  */
18875     case LABEL_REF:
18876     case PLUS:
18877       {
18878         int i;
18879         int overlap = -1;
18880         for (i = 0; i < nregs; i++)
18881           {
18882             /* We're only using DImode here because it's a convenient size.  */
18883             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18884             ops[1] = adjust_address (mem, DImode, 8 * i);
18885             if (reg_overlap_mentioned_p (ops[0], mem))
18886               {
18887                 gcc_assert (overlap == -1);
18888                 overlap = i;
18889               }
18890             else
18891               {
18892                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18893                 output_asm_insn (buff, ops);
18894               }
18895           }
18896         if (overlap != -1)
18897           {
18898             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18899             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18900             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18901             output_asm_insn (buff, ops);
18902           }
18903
18904         return "";
18905       }
18906
18907     default:
18908       gcc_unreachable ();
18909     }
18910
18911   sprintf (buff, templ, load ? "ld" : "st");
18912   output_asm_insn (buff, ops);
18913
18914   return "";
18915 }
18916
18917 /* Compute and return the length of neon_mov<mode>, where <mode> is
18918    one of VSTRUCT modes: EI, OI, CI or XI.  */
18919 int
18920 arm_attr_length_move_neon (rtx_insn *insn)
18921 {
18922   rtx reg, mem, addr;
18923   int load;
18924   machine_mode mode;
18925
18926   extract_insn_cached (insn);
18927
18928   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18929     {
18930       mode = GET_MODE (recog_data.operand[0]);
18931       switch (mode)
18932         {
18933         case E_EImode:
18934         case E_OImode:
18935           return 8;
18936         case E_CImode:
18937           return 12;
18938         case E_XImode:
18939           return 16;
18940         default:
18941           gcc_unreachable ();
18942         }
18943     }
18944
18945   load = REG_P (recog_data.operand[0]);
18946   reg = recog_data.operand[!load];
18947   mem = recog_data.operand[load];
18948
18949   gcc_assert (MEM_P (mem));
18950
18951   addr = XEXP (mem, 0);
18952
18953   /* Strip off const from addresses like (const (plus (...))).  */
18954   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18955     addr = XEXP (addr, 0);
18956
18957   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18958     {
18959       int insns = REG_NREGS (reg) / 2;
18960       return insns * 4;
18961     }
18962   else
18963     return 4;
18964 }
18965
18966 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18967    return zero.  */
18968
18969 int
18970 arm_address_offset_is_imm (rtx_insn *insn)
18971 {
18972   rtx mem, addr;
18973
18974   extract_insn_cached (insn);
18975
18976   if (REG_P (recog_data.operand[0]))
18977     return 0;
18978
18979   mem = recog_data.operand[0];
18980
18981   gcc_assert (MEM_P (mem));
18982
18983   addr = XEXP (mem, 0);
18984
18985   if (REG_P (addr)
18986       || (GET_CODE (addr) == PLUS
18987           && REG_P (XEXP (addr, 0))
18988           && CONST_INT_P (XEXP (addr, 1))))
18989     return 1;
18990   else
18991     return 0;
18992 }
18993
18994 /* Output an ADD r, s, #n where n may be too big for one instruction.
18995    If adding zero to one register, output nothing.  */
18996 const char *
18997 output_add_immediate (rtx *operands)
18998 {
18999   HOST_WIDE_INT n = INTVAL (operands[2]);
19000
19001   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19002     {
19003       if (n < 0)
19004         output_multi_immediate (operands,
19005                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19006                                 -n);
19007       else
19008         output_multi_immediate (operands,
19009                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19010                                 n);
19011     }
19012
19013   return "";
19014 }
19015
19016 /* Output a multiple immediate operation.
19017    OPERANDS is the vector of operands referred to in the output patterns.
19018    INSTR1 is the output pattern to use for the first constant.
19019    INSTR2 is the output pattern to use for subsequent constants.
19020    IMMED_OP is the index of the constant slot in OPERANDS.
19021    N is the constant value.  */
19022 static const char *
19023 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19024                         int immed_op, HOST_WIDE_INT n)
19025 {
19026 #if HOST_BITS_PER_WIDE_INT > 32
19027   n &= 0xffffffff;
19028 #endif
19029
19030   if (n == 0)
19031     {
19032       /* Quick and easy output.  */
19033       operands[immed_op] = const0_rtx;
19034       output_asm_insn (instr1, operands);
19035     }
19036   else
19037     {
19038       int i;
19039       const char * instr = instr1;
19040
19041       /* Note that n is never zero here (which would give no output).  */
19042       for (i = 0; i < 32; i += 2)
19043         {
19044           if (n & (3 << i))
19045             {
19046               operands[immed_op] = GEN_INT (n & (255 << i));
19047               output_asm_insn (instr, operands);
19048               instr = instr2;
19049               i += 6;
19050             }
19051         }
19052     }
19053
19054   return "";
19055 }
19056
19057 /* Return the name of a shifter operation.  */
19058 static const char *
19059 arm_shift_nmem(enum rtx_code code)
19060 {
19061   switch (code)
19062     {
19063     case ASHIFT:
19064       return ARM_LSL_NAME;
19065
19066     case ASHIFTRT:
19067       return "asr";
19068
19069     case LSHIFTRT:
19070       return "lsr";
19071
19072     case ROTATERT:
19073       return "ror";
19074
19075     default:
19076       abort();
19077     }
19078 }
19079
19080 /* Return the appropriate ARM instruction for the operation code.
19081    The returned result should not be overwritten.  OP is the rtx of the
19082    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19083    was shifted.  */
19084 const char *
19085 arithmetic_instr (rtx op, int shift_first_arg)
19086 {
19087   switch (GET_CODE (op))
19088     {
19089     case PLUS:
19090       return "add";
19091
19092     case MINUS:
19093       return shift_first_arg ? "rsb" : "sub";
19094
19095     case IOR:
19096       return "orr";
19097
19098     case XOR:
19099       return "eor";
19100
19101     case AND:
19102       return "and";
19103
19104     case ASHIFT:
19105     case ASHIFTRT:
19106     case LSHIFTRT:
19107     case ROTATERT:
19108       return arm_shift_nmem(GET_CODE(op));
19109
19110     default:
19111       gcc_unreachable ();
19112     }
19113 }
19114
19115 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19116    for the operation code.  The returned result should not be overwritten.
19117    OP is the rtx code of the shift.
19118    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19119    shift.  */
19120 static const char *
19121 shift_op (rtx op, HOST_WIDE_INT *amountp)
19122 {
19123   const char * mnem;
19124   enum rtx_code code = GET_CODE (op);
19125
19126   switch (code)
19127     {
19128     case ROTATE:
19129       if (!CONST_INT_P (XEXP (op, 1)))
19130         {
19131           output_operand_lossage ("invalid shift operand");
19132           return NULL;
19133         }
19134
19135       code = ROTATERT;
19136       *amountp = 32 - INTVAL (XEXP (op, 1));
19137       mnem = "ror";
19138       break;
19139
19140     case ASHIFT:
19141     case ASHIFTRT:
19142     case LSHIFTRT:
19143     case ROTATERT:
19144       mnem = arm_shift_nmem(code);
19145       if (CONST_INT_P (XEXP (op, 1)))
19146         {
19147           *amountp = INTVAL (XEXP (op, 1));
19148         }
19149       else if (REG_P (XEXP (op, 1)))
19150         {
19151           *amountp = -1;
19152           return mnem;
19153         }
19154       else
19155         {
19156           output_operand_lossage ("invalid shift operand");
19157           return NULL;
19158         }
19159       break;
19160
19161     case MULT:
19162       /* We never have to worry about the amount being other than a
19163          power of 2, since this case can never be reloaded from a reg.  */
19164       if (!CONST_INT_P (XEXP (op, 1)))
19165         {
19166           output_operand_lossage ("invalid shift operand");
19167           return NULL;
19168         }
19169
19170       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19171
19172       /* Amount must be a power of two.  */
19173       if (*amountp & (*amountp - 1))
19174         {
19175           output_operand_lossage ("invalid shift operand");
19176           return NULL;
19177         }
19178
19179       *amountp = exact_log2 (*amountp);
19180       gcc_assert (IN_RANGE (*amountp, 0, 31));
19181       return ARM_LSL_NAME;
19182
19183     default:
19184       output_operand_lossage ("invalid shift operand");
19185       return NULL;
19186     }
19187
19188   /* This is not 100% correct, but follows from the desire to merge
19189      multiplication by a power of 2 with the recognizer for a
19190      shift.  >=32 is not a valid shift for "lsl", so we must try and
19191      output a shift that produces the correct arithmetical result.
19192      Using lsr #32 is identical except for the fact that the carry bit
19193      is not set correctly if we set the flags; but we never use the
19194      carry bit from such an operation, so we can ignore that.  */
19195   if (code == ROTATERT)
19196     /* Rotate is just modulo 32.  */
19197     *amountp &= 31;
19198   else if (*amountp != (*amountp & 31))
19199     {
19200       if (code == ASHIFT)
19201         mnem = "lsr";
19202       *amountp = 32;
19203     }
19204
19205   /* Shifts of 0 are no-ops.  */
19206   if (*amountp == 0)
19207     return NULL;
19208
19209   return mnem;
19210 }
19211
19212 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19213    because /bin/as is horribly restrictive.  The judgement about
19214    whether or not each character is 'printable' (and can be output as
19215    is) or not (and must be printed with an octal escape) must be made
19216    with reference to the *host* character set -- the situation is
19217    similar to that discussed in the comments above pp_c_char in
19218    c-pretty-print.c.  */
19219
19220 #define MAX_ASCII_LEN 51
19221
19222 void
19223 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19224 {
19225   int i;
19226   int len_so_far = 0;
19227
19228   fputs ("\t.ascii\t\"", stream);
19229
19230   for (i = 0; i < len; i++)
19231     {
19232       int c = p[i];
19233
19234       if (len_so_far >= MAX_ASCII_LEN)
19235         {
19236           fputs ("\"\n\t.ascii\t\"", stream);
19237           len_so_far = 0;
19238         }
19239
19240       if (ISPRINT (c))
19241         {
19242           if (c == '\\' || c == '\"')
19243             {
19244               putc ('\\', stream);
19245               len_so_far++;
19246             }
19247           putc (c, stream);
19248           len_so_far++;
19249         }
19250       else
19251         {
19252           fprintf (stream, "\\%03o", c);
19253           len_so_far += 4;
19254         }
19255     }
19256
19257   fputs ("\"\n", stream);
19258 }
19259 \f
19260 /* Whether a register is callee saved or not.  This is necessary because high
19261    registers are marked as caller saved when optimizing for size on Thumb-1
19262    targets despite being callee saved in order to avoid using them.  */
19263 #define callee_saved_reg_p(reg) \
19264   (!call_used_regs[reg] \
19265    || (TARGET_THUMB1 && optimize_size \
19266        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19267
19268 /* Compute the register save mask for registers 0 through 12
19269    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19270
19271 static unsigned long
19272 arm_compute_save_reg0_reg12_mask (void)
19273 {
19274   unsigned long func_type = arm_current_func_type ();
19275   unsigned long save_reg_mask = 0;
19276   unsigned int reg;
19277
19278   if (IS_INTERRUPT (func_type))
19279     {
19280       unsigned int max_reg;
19281       /* Interrupt functions must not corrupt any registers,
19282          even call clobbered ones.  If this is a leaf function
19283          we can just examine the registers used by the RTL, but
19284          otherwise we have to assume that whatever function is
19285          called might clobber anything, and so we have to save
19286          all the call-clobbered registers as well.  */
19287       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19288         /* FIQ handlers have registers r8 - r12 banked, so
19289            we only need to check r0 - r7, Normal ISRs only
19290            bank r14 and r15, so we must check up to r12.
19291            r13 is the stack pointer which is always preserved,
19292            so we do not need to consider it here.  */
19293         max_reg = 7;
19294       else
19295         max_reg = 12;
19296
19297       for (reg = 0; reg <= max_reg; reg++)
19298         if (df_regs_ever_live_p (reg)
19299             || (! crtl->is_leaf && call_used_regs[reg]))
19300           save_reg_mask |= (1 << reg);
19301
19302       /* Also save the pic base register if necessary.  */
19303       if (flag_pic
19304           && !TARGET_SINGLE_PIC_BASE
19305           && arm_pic_register != INVALID_REGNUM
19306           && crtl->uses_pic_offset_table)
19307         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19308     }
19309   else if (IS_VOLATILE(func_type))
19310     {
19311       /* For noreturn functions we historically omitted register saves
19312          altogether.  However this really messes up debugging.  As a
19313          compromise save just the frame pointers.  Combined with the link
19314          register saved elsewhere this should be sufficient to get
19315          a backtrace.  */
19316       if (frame_pointer_needed)
19317         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19318       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19319         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19320       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19321         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19322     }
19323   else
19324     {
19325       /* In the normal case we only need to save those registers
19326          which are call saved and which are used by this function.  */
19327       for (reg = 0; reg <= 11; reg++)
19328         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19329           save_reg_mask |= (1 << reg);
19330
19331       /* Handle the frame pointer as a special case.  */
19332       if (frame_pointer_needed)
19333         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19334
19335       /* If we aren't loading the PIC register,
19336          don't stack it even though it may be live.  */
19337       if (flag_pic
19338           && !TARGET_SINGLE_PIC_BASE
19339           && arm_pic_register != INVALID_REGNUM
19340           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19341               || crtl->uses_pic_offset_table))
19342         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19343
19344       /* The prologue will copy SP into R0, so save it.  */
19345       if (IS_STACKALIGN (func_type))
19346         save_reg_mask |= 1;
19347     }
19348
19349   /* Save registers so the exception handler can modify them.  */
19350   if (crtl->calls_eh_return)
19351     {
19352       unsigned int i;
19353
19354       for (i = 0; ; i++)
19355         {
19356           reg = EH_RETURN_DATA_REGNO (i);
19357           if (reg == INVALID_REGNUM)
19358             break;
19359           save_reg_mask |= 1 << reg;
19360         }
19361     }
19362
19363   return save_reg_mask;
19364 }
19365
19366 /* Return true if r3 is live at the start of the function.  */
19367
19368 static bool
19369 arm_r3_live_at_start_p (void)
19370 {
19371   /* Just look at cfg info, which is still close enough to correct at this
19372      point.  This gives false positives for broken functions that might use
19373      uninitialized data that happens to be allocated in r3, but who cares?  */
19374   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19375 }
19376
19377 /* Compute the number of bytes used to store the static chain register on the
19378    stack, above the stack frame.  We need to know this accurately to get the
19379    alignment of the rest of the stack frame correct.  */
19380
19381 static int
19382 arm_compute_static_chain_stack_bytes (void)
19383 {
19384   /* Once the value is updated from the init value of -1, do not
19385      re-compute.  */
19386   if (cfun->machine->static_chain_stack_bytes != -1)
19387     return cfun->machine->static_chain_stack_bytes;
19388
19389   /* See the defining assertion in arm_expand_prologue.  */
19390   if (IS_NESTED (arm_current_func_type ())
19391       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19392           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19393                || flag_stack_clash_protection)
19394               && !df_regs_ever_live_p (LR_REGNUM)))
19395       && arm_r3_live_at_start_p ()
19396       && crtl->args.pretend_args_size == 0)
19397     return 4;
19398
19399   return 0;
19400 }
19401
19402 /* Compute a bit mask of which core registers need to be
19403    saved on the stack for the current function.
19404    This is used by arm_compute_frame_layout, which may add extra registers.  */
19405
19406 static unsigned long
19407 arm_compute_save_core_reg_mask (void)
19408 {
19409   unsigned int save_reg_mask = 0;
19410   unsigned long func_type = arm_current_func_type ();
19411   unsigned int reg;
19412
19413   if (IS_NAKED (func_type))
19414     /* This should never really happen.  */
19415     return 0;
19416
19417   /* If we are creating a stack frame, then we must save the frame pointer,
19418      IP (which will hold the old stack pointer), LR and the PC.  */
19419   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19420     save_reg_mask |=
19421       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19422       | (1 << IP_REGNUM)
19423       | (1 << LR_REGNUM)
19424       | (1 << PC_REGNUM);
19425
19426   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19427
19428   /* Decide if we need to save the link register.
19429      Interrupt routines have their own banked link register,
19430      so they never need to save it.
19431      Otherwise if we do not use the link register we do not need to save
19432      it.  If we are pushing other registers onto the stack however, we
19433      can save an instruction in the epilogue by pushing the link register
19434      now and then popping it back into the PC.  This incurs extra memory
19435      accesses though, so we only do it when optimizing for size, and only
19436      if we know that we will not need a fancy return sequence.  */
19437   if (df_regs_ever_live_p (LR_REGNUM)
19438       || (save_reg_mask
19439           && optimize_size
19440           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19441           && !crtl->tail_call_emit
19442           && !crtl->calls_eh_return))
19443     save_reg_mask |= 1 << LR_REGNUM;
19444
19445   if (cfun->machine->lr_save_eliminated)
19446     save_reg_mask &= ~ (1 << LR_REGNUM);
19447
19448   if (TARGET_REALLY_IWMMXT
19449       && ((bit_count (save_reg_mask)
19450            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19451                            arm_compute_static_chain_stack_bytes())
19452            ) % 2) != 0)
19453     {
19454       /* The total number of registers that are going to be pushed
19455          onto the stack is odd.  We need to ensure that the stack
19456          is 64-bit aligned before we start to save iWMMXt registers,
19457          and also before we start to create locals.  (A local variable
19458          might be a double or long long which we will load/store using
19459          an iWMMXt instruction).  Therefore we need to push another
19460          ARM register, so that the stack will be 64-bit aligned.  We
19461          try to avoid using the arg registers (r0 -r3) as they might be
19462          used to pass values in a tail call.  */
19463       for (reg = 4; reg <= 12; reg++)
19464         if ((save_reg_mask & (1 << reg)) == 0)
19465           break;
19466
19467       if (reg <= 12)
19468         save_reg_mask |= (1 << reg);
19469       else
19470         {
19471           cfun->machine->sibcall_blocked = 1;
19472           save_reg_mask |= (1 << 3);
19473         }
19474     }
19475
19476   /* We may need to push an additional register for use initializing the
19477      PIC base register.  */
19478   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19479       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19480     {
19481       reg = thumb_find_work_register (1 << 4);
19482       if (!call_used_regs[reg])
19483         save_reg_mask |= (1 << reg);
19484     }
19485
19486   return save_reg_mask;
19487 }
19488
19489 /* Compute a bit mask of which core registers need to be
19490    saved on the stack for the current function.  */
19491 static unsigned long
19492 thumb1_compute_save_core_reg_mask (void)
19493 {
19494   unsigned long mask;
19495   unsigned reg;
19496
19497   mask = 0;
19498   for (reg = 0; reg < 12; reg ++)
19499     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19500       mask |= 1 << reg;
19501
19502   /* Handle the frame pointer as a special case.  */
19503   if (frame_pointer_needed)
19504     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19505
19506   if (flag_pic
19507       && !TARGET_SINGLE_PIC_BASE
19508       && arm_pic_register != INVALID_REGNUM
19509       && crtl->uses_pic_offset_table)
19510     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19511
19512   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19513   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19514     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19515
19516   /* LR will also be pushed if any lo regs are pushed.  */
19517   if (mask & 0xff || thumb_force_lr_save ())
19518     mask |= (1 << LR_REGNUM);
19519
19520   /* Make sure we have a low work register if we need one.
19521      We will need one if we are going to push a high register,
19522      but we are not currently intending to push a low register.  */
19523   if ((mask & 0xff) == 0
19524       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19525     {
19526       /* Use thumb_find_work_register to choose which register
19527          we will use.  If the register is live then we will
19528          have to push it.  Use LAST_LO_REGNUM as our fallback
19529          choice for the register to select.  */
19530       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19531       /* Make sure the register returned by thumb_find_work_register is
19532          not part of the return value.  */
19533       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19534         reg = LAST_LO_REGNUM;
19535
19536       if (callee_saved_reg_p (reg))
19537         mask |= 1 << reg;
19538     }
19539
19540   /* The 504 below is 8 bytes less than 512 because there are two possible
19541      alignment words.  We can't tell here if they will be present or not so we
19542      have to play it safe and assume that they are. */
19543   if ((CALLER_INTERWORKING_SLOT_SIZE +
19544        ROUND_UP_WORD (get_frame_size ()) +
19545        crtl->outgoing_args_size) >= 504)
19546     {
19547       /* This is the same as the code in thumb1_expand_prologue() which
19548          determines which register to use for stack decrement. */
19549       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19550         if (mask & (1 << reg))
19551           break;
19552
19553       if (reg > LAST_LO_REGNUM)
19554         {
19555           /* Make sure we have a register available for stack decrement. */
19556           mask |= 1 << LAST_LO_REGNUM;
19557         }
19558     }
19559
19560   return mask;
19561 }
19562
19563
19564 /* Return the number of bytes required to save VFP registers.  */
19565 static int
19566 arm_get_vfp_saved_size (void)
19567 {
19568   unsigned int regno;
19569   int count;
19570   int saved;
19571
19572   saved = 0;
19573   /* Space for saved VFP registers.  */
19574   if (TARGET_HARD_FLOAT)
19575     {
19576       count = 0;
19577       for (regno = FIRST_VFP_REGNUM;
19578            regno < LAST_VFP_REGNUM;
19579            regno += 2)
19580         {
19581           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19582               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19583             {
19584               if (count > 0)
19585                 {
19586                   /* Workaround ARM10 VFPr1 bug.  */
19587                   if (count == 2 && !arm_arch6)
19588                     count++;
19589                   saved += count * 8;
19590                 }
19591               count = 0;
19592             }
19593           else
19594             count++;
19595         }
19596       if (count > 0)
19597         {
19598           if (count == 2 && !arm_arch6)
19599             count++;
19600           saved += count * 8;
19601         }
19602     }
19603   return saved;
19604 }
19605
19606
19607 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19608    everything bar the final return instruction.  If simple_return is true,
19609    then do not output epilogue, because it has already been emitted in RTL.
19610
19611    Note: do not forget to update length attribute of corresponding insn pattern
19612    when changing assembly output (eg. length attribute of
19613    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19614    register clearing sequences).  */
19615 const char *
19616 output_return_instruction (rtx operand, bool really_return, bool reverse,
19617                            bool simple_return)
19618 {
19619   char conditional[10];
19620   char instr[100];
19621   unsigned reg;
19622   unsigned long live_regs_mask;
19623   unsigned long func_type;
19624   arm_stack_offsets *offsets;
19625
19626   func_type = arm_current_func_type ();
19627
19628   if (IS_NAKED (func_type))
19629     return "";
19630
19631   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19632     {
19633       /* If this function was declared non-returning, and we have
19634          found a tail call, then we have to trust that the called
19635          function won't return.  */
19636       if (really_return)
19637         {
19638           rtx ops[2];
19639
19640           /* Otherwise, trap an attempted return by aborting.  */
19641           ops[0] = operand;
19642           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19643                                        : "abort");
19644           assemble_external_libcall (ops[1]);
19645           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19646         }
19647
19648       return "";
19649     }
19650
19651   gcc_assert (!cfun->calls_alloca || really_return);
19652
19653   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19654
19655   cfun->machine->return_used_this_function = 1;
19656
19657   offsets = arm_get_frame_offsets ();
19658   live_regs_mask = offsets->saved_regs_mask;
19659
19660   if (!simple_return && live_regs_mask)
19661     {
19662       const char * return_reg;
19663
19664       /* If we do not have any special requirements for function exit
19665          (e.g. interworking) then we can load the return address
19666          directly into the PC.  Otherwise we must load it into LR.  */
19667       if (really_return
19668           && !IS_CMSE_ENTRY (func_type)
19669           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19670         return_reg = reg_names[PC_REGNUM];
19671       else
19672         return_reg = reg_names[LR_REGNUM];
19673
19674       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19675         {
19676           /* There are three possible reasons for the IP register
19677              being saved.  1) a stack frame was created, in which case
19678              IP contains the old stack pointer, or 2) an ISR routine
19679              corrupted it, or 3) it was saved to align the stack on
19680              iWMMXt.  In case 1, restore IP into SP, otherwise just
19681              restore IP.  */
19682           if (frame_pointer_needed)
19683             {
19684               live_regs_mask &= ~ (1 << IP_REGNUM);
19685               live_regs_mask |=   (1 << SP_REGNUM);
19686             }
19687           else
19688             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19689         }
19690
19691       /* On some ARM architectures it is faster to use LDR rather than
19692          LDM to load a single register.  On other architectures, the
19693          cost is the same.  In 26 bit mode, or for exception handlers,
19694          we have to use LDM to load the PC so that the CPSR is also
19695          restored.  */
19696       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19697         if (live_regs_mask == (1U << reg))
19698           break;
19699
19700       if (reg <= LAST_ARM_REGNUM
19701           && (reg != LR_REGNUM
19702               || ! really_return
19703               || ! IS_INTERRUPT (func_type)))
19704         {
19705           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19706                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19707         }
19708       else
19709         {
19710           char *p;
19711           int first = 1;
19712
19713           /* Generate the load multiple instruction to restore the
19714              registers.  Note we can get here, even if
19715              frame_pointer_needed is true, but only if sp already
19716              points to the base of the saved core registers.  */
19717           if (live_regs_mask & (1 << SP_REGNUM))
19718             {
19719               unsigned HOST_WIDE_INT stack_adjust;
19720
19721               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19722               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19723
19724               if (stack_adjust && arm_arch5t && TARGET_ARM)
19725                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19726               else
19727                 {
19728                   /* If we can't use ldmib (SA110 bug),
19729                      then try to pop r3 instead.  */
19730                   if (stack_adjust)
19731                     live_regs_mask |= 1 << 3;
19732
19733                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19734                 }
19735             }
19736           /* For interrupt returns we have to use an LDM rather than
19737              a POP so that we can use the exception return variant.  */
19738           else if (IS_INTERRUPT (func_type))
19739             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19740           else
19741             sprintf (instr, "pop%s\t{", conditional);
19742
19743           p = instr + strlen (instr);
19744
19745           for (reg = 0; reg <= SP_REGNUM; reg++)
19746             if (live_regs_mask & (1 << reg))
19747               {
19748                 int l = strlen (reg_names[reg]);
19749
19750                 if (first)
19751                   first = 0;
19752                 else
19753                   {
19754                     memcpy (p, ", ", 2);
19755                     p += 2;
19756                   }
19757
19758                 memcpy (p, "%|", 2);
19759                 memcpy (p + 2, reg_names[reg], l);
19760                 p += l + 2;
19761               }
19762
19763           if (live_regs_mask & (1 << LR_REGNUM))
19764             {
19765               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19766               /* If returning from an interrupt, restore the CPSR.  */
19767               if (IS_INTERRUPT (func_type))
19768                 strcat (p, "^");
19769             }
19770           else
19771             strcpy (p, "}");
19772         }
19773
19774       output_asm_insn (instr, & operand);
19775
19776       /* See if we need to generate an extra instruction to
19777          perform the actual function return.  */
19778       if (really_return
19779           && func_type != ARM_FT_INTERWORKED
19780           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19781         {
19782           /* The return has already been handled
19783              by loading the LR into the PC.  */
19784           return "";
19785         }
19786     }
19787
19788   if (really_return)
19789     {
19790       switch ((int) ARM_FUNC_TYPE (func_type))
19791         {
19792         case ARM_FT_ISR:
19793         case ARM_FT_FIQ:
19794           /* ??? This is wrong for unified assembly syntax.  */
19795           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19796           break;
19797
19798         case ARM_FT_INTERWORKED:
19799           gcc_assert (arm_arch5t || arm_arch4t);
19800           sprintf (instr, "bx%s\t%%|lr", conditional);
19801           break;
19802
19803         case ARM_FT_EXCEPTION:
19804           /* ??? This is wrong for unified assembly syntax.  */
19805           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19806           break;
19807
19808         default:
19809           if (IS_CMSE_ENTRY (func_type))
19810             {
19811               /* Check if we have to clear the 'GE bits' which is only used if
19812                  parallel add and subtraction instructions are available.  */
19813               if (TARGET_INT_SIMD)
19814                 snprintf (instr, sizeof (instr),
19815                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19816               else
19817                 snprintf (instr, sizeof (instr),
19818                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19819
19820               output_asm_insn (instr, & operand);
19821               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19822                 {
19823                   /* Clear the cumulative exception-status bits (0-4,7) and the
19824                      condition code bits (28-31) of the FPSCR.  We need to
19825                      remember to clear the first scratch register used (IP) and
19826                      save and restore the second (r4).  */
19827                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19828                   output_asm_insn (instr, & operand);
19829                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19830                   output_asm_insn (instr, & operand);
19831                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19832                   output_asm_insn (instr, & operand);
19833                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19834                   output_asm_insn (instr, & operand);
19835                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19836                   output_asm_insn (instr, & operand);
19837                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19838                   output_asm_insn (instr, & operand);
19839                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19840                   output_asm_insn (instr, & operand);
19841                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19842                   output_asm_insn (instr, & operand);
19843                 }
19844               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19845             }
19846           /* Use bx if it's available.  */
19847           else if (arm_arch5t || arm_arch4t)
19848             sprintf (instr, "bx%s\t%%|lr", conditional);
19849           else
19850             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19851           break;
19852         }
19853
19854       output_asm_insn (instr, & operand);
19855     }
19856
19857   return "";
19858 }
19859
19860 /* Output in FILE asm statements needed to declare the NAME of the function
19861    defined by its DECL node.  */
19862
19863 void
19864 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19865 {
19866   size_t cmse_name_len;
19867   char *cmse_name = 0;
19868   char cmse_prefix[] = "__acle_se_";
19869
19870   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19871      extra function label for each function with the 'cmse_nonsecure_entry'
19872      attribute.  This extra function label should be prepended with
19873      '__acle_se_', telling the linker that it needs to create secure gateway
19874      veneers for this function.  */
19875   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19876                                     DECL_ATTRIBUTES (decl)))
19877     {
19878       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19879       cmse_name = XALLOCAVEC (char, cmse_name_len);
19880       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19881       targetm.asm_out.globalize_label (file, cmse_name);
19882
19883       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19884       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19885     }
19886
19887   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19888   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19889   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19890   ASM_OUTPUT_LABEL (file, name);
19891
19892   if (cmse_name)
19893     ASM_OUTPUT_LABEL (file, cmse_name);
19894
19895   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19896 }
19897
19898 /* Write the function name into the code section, directly preceding
19899    the function prologue.
19900
19901    Code will be output similar to this:
19902      t0
19903          .ascii "arm_poke_function_name", 0
19904          .align
19905      t1
19906          .word 0xff000000 + (t1 - t0)
19907      arm_poke_function_name
19908          mov     ip, sp
19909          stmfd   sp!, {fp, ip, lr, pc}
19910          sub     fp, ip, #4
19911
19912    When performing a stack backtrace, code can inspect the value
19913    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19914    at location pc - 12 and the top 8 bits are set, then we know
19915    that there is a function name embedded immediately preceding this
19916    location and has length ((pc[-3]) & 0xff000000).
19917
19918    We assume that pc is declared as a pointer to an unsigned long.
19919
19920    It is of no benefit to output the function name if we are assembling
19921    a leaf function.  These function types will not contain a stack
19922    backtrace structure, therefore it is not possible to determine the
19923    function name.  */
19924 void
19925 arm_poke_function_name (FILE *stream, const char *name)
19926 {
19927   unsigned long alignlength;
19928   unsigned long length;
19929   rtx           x;
19930
19931   length      = strlen (name) + 1;
19932   alignlength = ROUND_UP_WORD (length);
19933
19934   ASM_OUTPUT_ASCII (stream, name, length);
19935   ASM_OUTPUT_ALIGN (stream, 2);
19936   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19937   assemble_aligned_integer (UNITS_PER_WORD, x);
19938 }
19939
19940 /* Place some comments into the assembler stream
19941    describing the current function.  */
19942 static void
19943 arm_output_function_prologue (FILE *f)
19944 {
19945   unsigned long func_type;
19946
19947   /* Sanity check.  */
19948   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19949
19950   func_type = arm_current_func_type ();
19951
19952   switch ((int) ARM_FUNC_TYPE (func_type))
19953     {
19954     default:
19955     case ARM_FT_NORMAL:
19956       break;
19957     case ARM_FT_INTERWORKED:
19958       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19959       break;
19960     case ARM_FT_ISR:
19961       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19962       break;
19963     case ARM_FT_FIQ:
19964       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19965       break;
19966     case ARM_FT_EXCEPTION:
19967       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19968       break;
19969     }
19970
19971   if (IS_NAKED (func_type))
19972     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19973
19974   if (IS_VOLATILE (func_type))
19975     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19976
19977   if (IS_NESTED (func_type))
19978     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19979   if (IS_STACKALIGN (func_type))
19980     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19981   if (IS_CMSE_ENTRY (func_type))
19982     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19983
19984   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19985                (HOST_WIDE_INT) crtl->args.size,
19986                crtl->args.pretend_args_size,
19987                (HOST_WIDE_INT) get_frame_size ());
19988
19989   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19990                frame_pointer_needed,
19991                cfun->machine->uses_anonymous_args);
19992
19993   if (cfun->machine->lr_save_eliminated)
19994     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19995
19996   if (crtl->calls_eh_return)
19997     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19998
19999 }
20000
20001 static void
20002 arm_output_function_epilogue (FILE *)
20003 {
20004   arm_stack_offsets *offsets;
20005
20006   if (TARGET_THUMB1)
20007     {
20008       int regno;
20009
20010       /* Emit any call-via-reg trampolines that are needed for v4t support
20011          of call_reg and call_value_reg type insns.  */
20012       for (regno = 0; regno < LR_REGNUM; regno++)
20013         {
20014           rtx label = cfun->machine->call_via[regno];
20015
20016           if (label != NULL)
20017             {
20018               switch_to_section (function_section (current_function_decl));
20019               targetm.asm_out.internal_label (asm_out_file, "L",
20020                                               CODE_LABEL_NUMBER (label));
20021               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20022             }
20023         }
20024
20025       /* ??? Probably not safe to set this here, since it assumes that a
20026          function will be emitted as assembly immediately after we generate
20027          RTL for it.  This does not happen for inline functions.  */
20028       cfun->machine->return_used_this_function = 0;
20029     }
20030   else /* TARGET_32BIT */
20031     {
20032       /* We need to take into account any stack-frame rounding.  */
20033       offsets = arm_get_frame_offsets ();
20034
20035       gcc_assert (!use_return_insn (FALSE, NULL)
20036                   || (cfun->machine->return_used_this_function != 0)
20037                   || offsets->saved_regs == offsets->outgoing_args
20038                   || frame_pointer_needed);
20039     }
20040 }
20041
20042 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20043    STR and STRD.  If an even number of registers are being pushed, one
20044    or more STRD patterns are created for each register pair.  If an
20045    odd number of registers are pushed, emit an initial STR followed by
20046    as many STRD instructions as are needed.  This works best when the
20047    stack is initially 64-bit aligned (the normal case), since it
20048    ensures that each STRD is also 64-bit aligned.  */
20049 static void
20050 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20051 {
20052   int num_regs = 0;
20053   int i;
20054   int regno;
20055   rtx par = NULL_RTX;
20056   rtx dwarf = NULL_RTX;
20057   rtx tmp;
20058   bool first = true;
20059
20060   num_regs = bit_count (saved_regs_mask);
20061
20062   /* Must be at least one register to save, and can't save SP or PC.  */
20063   gcc_assert (num_regs > 0 && num_regs <= 14);
20064   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20065   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20066
20067   /* Create sequence for DWARF info.  All the frame-related data for
20068      debugging is held in this wrapper.  */
20069   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20070
20071   /* Describe the stack adjustment.  */
20072   tmp = gen_rtx_SET (stack_pointer_rtx,
20073                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20074   RTX_FRAME_RELATED_P (tmp) = 1;
20075   XVECEXP (dwarf, 0, 0) = tmp;
20076
20077   /* Find the first register.  */
20078   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20079     ;
20080
20081   i = 0;
20082
20083   /* If there's an odd number of registers to push.  Start off by
20084      pushing a single register.  This ensures that subsequent strd
20085      operations are dword aligned (assuming that SP was originally
20086      64-bit aligned).  */
20087   if ((num_regs & 1) != 0)
20088     {
20089       rtx reg, mem, insn;
20090
20091       reg = gen_rtx_REG (SImode, regno);
20092       if (num_regs == 1)
20093         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20094                                                      stack_pointer_rtx));
20095       else
20096         mem = gen_frame_mem (Pmode,
20097                              gen_rtx_PRE_MODIFY
20098                              (Pmode, stack_pointer_rtx,
20099                               plus_constant (Pmode, stack_pointer_rtx,
20100                                              -4 * num_regs)));
20101
20102       tmp = gen_rtx_SET (mem, reg);
20103       RTX_FRAME_RELATED_P (tmp) = 1;
20104       insn = emit_insn (tmp);
20105       RTX_FRAME_RELATED_P (insn) = 1;
20106       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20107       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20108       RTX_FRAME_RELATED_P (tmp) = 1;
20109       i++;
20110       regno++;
20111       XVECEXP (dwarf, 0, i) = tmp;
20112       first = false;
20113     }
20114
20115   while (i < num_regs)
20116     if (saved_regs_mask & (1 << regno))
20117       {
20118         rtx reg1, reg2, mem1, mem2;
20119         rtx tmp0, tmp1, tmp2;
20120         int regno2;
20121
20122         /* Find the register to pair with this one.  */
20123         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20124              regno2++)
20125           ;
20126
20127         reg1 = gen_rtx_REG (SImode, regno);
20128         reg2 = gen_rtx_REG (SImode, regno2);
20129
20130         if (first)
20131           {
20132             rtx insn;
20133
20134             first = false;
20135             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20136                                                         stack_pointer_rtx,
20137                                                         -4 * num_regs));
20138             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20139                                                         stack_pointer_rtx,
20140                                                         -4 * (num_regs - 1)));
20141             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20142                                 plus_constant (Pmode, stack_pointer_rtx,
20143                                                -4 * (num_regs)));
20144             tmp1 = gen_rtx_SET (mem1, reg1);
20145             tmp2 = gen_rtx_SET (mem2, reg2);
20146             RTX_FRAME_RELATED_P (tmp0) = 1;
20147             RTX_FRAME_RELATED_P (tmp1) = 1;
20148             RTX_FRAME_RELATED_P (tmp2) = 1;
20149             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20150             XVECEXP (par, 0, 0) = tmp0;
20151             XVECEXP (par, 0, 1) = tmp1;
20152             XVECEXP (par, 0, 2) = tmp2;
20153             insn = emit_insn (par);
20154             RTX_FRAME_RELATED_P (insn) = 1;
20155             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20156           }
20157         else
20158           {
20159             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20160                                                         stack_pointer_rtx,
20161                                                         4 * i));
20162             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20163                                                         stack_pointer_rtx,
20164                                                         4 * (i + 1)));
20165             tmp1 = gen_rtx_SET (mem1, reg1);
20166             tmp2 = gen_rtx_SET (mem2, reg2);
20167             RTX_FRAME_RELATED_P (tmp1) = 1;
20168             RTX_FRAME_RELATED_P (tmp2) = 1;
20169             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20170             XVECEXP (par, 0, 0) = tmp1;
20171             XVECEXP (par, 0, 1) = tmp2;
20172             emit_insn (par);
20173           }
20174
20175         /* Create unwind information.  This is an approximation.  */
20176         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20177                                            plus_constant (Pmode,
20178                                                           stack_pointer_rtx,
20179                                                           4 * i)),
20180                             reg1);
20181         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20182                                            plus_constant (Pmode,
20183                                                           stack_pointer_rtx,
20184                                                           4 * (i + 1))),
20185                             reg2);
20186
20187         RTX_FRAME_RELATED_P (tmp1) = 1;
20188         RTX_FRAME_RELATED_P (tmp2) = 1;
20189         XVECEXP (dwarf, 0, i + 1) = tmp1;
20190         XVECEXP (dwarf, 0, i + 2) = tmp2;
20191         i += 2;
20192         regno = regno2 + 1;
20193       }
20194     else
20195       regno++;
20196
20197   return;
20198 }
20199
20200 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20201    whenever possible, otherwise it emits single-word stores.  The first store
20202    also allocates stack space for all saved registers, using writeback with
20203    post-addressing mode.  All other stores use offset addressing.  If no STRD
20204    can be emitted, this function emits a sequence of single-word stores,
20205    and not an STM as before, because single-word stores provide more freedom
20206    scheduling and can be turned into an STM by peephole optimizations.  */
20207 static void
20208 arm_emit_strd_push (unsigned long saved_regs_mask)
20209 {
20210   int num_regs = 0;
20211   int i, j, dwarf_index  = 0;
20212   int offset = 0;
20213   rtx dwarf = NULL_RTX;
20214   rtx insn = NULL_RTX;
20215   rtx tmp, mem;
20216
20217   /* TODO: A more efficient code can be emitted by changing the
20218      layout, e.g., first push all pairs that can use STRD to keep the
20219      stack aligned, and then push all other registers.  */
20220   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20221     if (saved_regs_mask & (1 << i))
20222       num_regs++;
20223
20224   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20225   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20226   gcc_assert (num_regs > 0);
20227
20228   /* Create sequence for DWARF info.  */
20229   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20230
20231   /* For dwarf info, we generate explicit stack update.  */
20232   tmp = gen_rtx_SET (stack_pointer_rtx,
20233                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20234   RTX_FRAME_RELATED_P (tmp) = 1;
20235   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20236
20237   /* Save registers.  */
20238   offset = - 4 * num_regs;
20239   j = 0;
20240   while (j <= LAST_ARM_REGNUM)
20241     if (saved_regs_mask & (1 << j))
20242       {
20243         if ((j % 2 == 0)
20244             && (saved_regs_mask & (1 << (j + 1))))
20245           {
20246             /* Current register and previous register form register pair for
20247                which STRD can be generated.  */
20248             if (offset < 0)
20249               {
20250                 /* Allocate stack space for all saved registers.  */
20251                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20252                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20253                 mem = gen_frame_mem (DImode, tmp);
20254                 offset = 0;
20255               }
20256             else if (offset > 0)
20257               mem = gen_frame_mem (DImode,
20258                                    plus_constant (Pmode,
20259                                                   stack_pointer_rtx,
20260                                                   offset));
20261             else
20262               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20263
20264             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20265             RTX_FRAME_RELATED_P (tmp) = 1;
20266             tmp = emit_insn (tmp);
20267
20268             /* Record the first store insn.  */
20269             if (dwarf_index == 1)
20270               insn = tmp;
20271
20272             /* Generate dwarf info.  */
20273             mem = gen_frame_mem (SImode,
20274                                  plus_constant (Pmode,
20275                                                 stack_pointer_rtx,
20276                                                 offset));
20277             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20278             RTX_FRAME_RELATED_P (tmp) = 1;
20279             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20280
20281             mem = gen_frame_mem (SImode,
20282                                  plus_constant (Pmode,
20283                                                 stack_pointer_rtx,
20284                                                 offset + 4));
20285             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20286             RTX_FRAME_RELATED_P (tmp) = 1;
20287             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20288
20289             offset += 8;
20290             j += 2;
20291           }
20292         else
20293           {
20294             /* Emit a single word store.  */
20295             if (offset < 0)
20296               {
20297                 /* Allocate stack space for all saved registers.  */
20298                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20299                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20300                 mem = gen_frame_mem (SImode, tmp);
20301                 offset = 0;
20302               }
20303             else if (offset > 0)
20304               mem = gen_frame_mem (SImode,
20305                                    plus_constant (Pmode,
20306                                                   stack_pointer_rtx,
20307                                                   offset));
20308             else
20309               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20310
20311             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20312             RTX_FRAME_RELATED_P (tmp) = 1;
20313             tmp = emit_insn (tmp);
20314
20315             /* Record the first store insn.  */
20316             if (dwarf_index == 1)
20317               insn = tmp;
20318
20319             /* Generate dwarf info.  */
20320             mem = gen_frame_mem (SImode,
20321                                  plus_constant(Pmode,
20322                                                stack_pointer_rtx,
20323                                                offset));
20324             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20325             RTX_FRAME_RELATED_P (tmp) = 1;
20326             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20327
20328             offset += 4;
20329             j += 1;
20330           }
20331       }
20332     else
20333       j++;
20334
20335   /* Attach dwarf info to the first insn we generate.  */
20336   gcc_assert (insn != NULL_RTX);
20337   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20338   RTX_FRAME_RELATED_P (insn) = 1;
20339 }
20340
20341 /* Generate and emit an insn that we will recognize as a push_multi.
20342    Unfortunately, since this insn does not reflect very well the actual
20343    semantics of the operation, we need to annotate the insn for the benefit
20344    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20345    MASK for registers that should be annotated for DWARF2 frame unwind
20346    information.  */
20347 static rtx
20348 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20349 {
20350   int num_regs = 0;
20351   int num_dwarf_regs = 0;
20352   int i, j;
20353   rtx par;
20354   rtx dwarf;
20355   int dwarf_par_index;
20356   rtx tmp, reg;
20357
20358   /* We don't record the PC in the dwarf frame information.  */
20359   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20360
20361   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20362     {
20363       if (mask & (1 << i))
20364         num_regs++;
20365       if (dwarf_regs_mask & (1 << i))
20366         num_dwarf_regs++;
20367     }
20368
20369   gcc_assert (num_regs && num_regs <= 16);
20370   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20371
20372   /* For the body of the insn we are going to generate an UNSPEC in
20373      parallel with several USEs.  This allows the insn to be recognized
20374      by the push_multi pattern in the arm.md file.
20375
20376      The body of the insn looks something like this:
20377
20378        (parallel [
20379            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20380                                         (const_int:SI <num>)))
20381                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20382            (use (reg:SI XX))
20383            (use (reg:SI YY))
20384            ...
20385         ])
20386
20387      For the frame note however, we try to be more explicit and actually
20388      show each register being stored into the stack frame, plus a (single)
20389      decrement of the stack pointer.  We do it this way in order to be
20390      friendly to the stack unwinding code, which only wants to see a single
20391      stack decrement per instruction.  The RTL we generate for the note looks
20392      something like this:
20393
20394       (sequence [
20395            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20396            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20397            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20398            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20399            ...
20400         ])
20401
20402      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20403      instead we'd have a parallel expression detailing all
20404      the stores to the various memory addresses so that debug
20405      information is more up-to-date. Remember however while writing
20406      this to take care of the constraints with the push instruction.
20407
20408      Note also that this has to be taken care of for the VFP registers.
20409
20410      For more see PR43399.  */
20411
20412   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20413   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20414   dwarf_par_index = 1;
20415
20416   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20417     {
20418       if (mask & (1 << i))
20419         {
20420           reg = gen_rtx_REG (SImode, i);
20421
20422           XVECEXP (par, 0, 0)
20423             = gen_rtx_SET (gen_frame_mem
20424                            (BLKmode,
20425                             gen_rtx_PRE_MODIFY (Pmode,
20426                                                 stack_pointer_rtx,
20427                                                 plus_constant
20428                                                 (Pmode, stack_pointer_rtx,
20429                                                  -4 * num_regs))
20430                             ),
20431                            gen_rtx_UNSPEC (BLKmode,
20432                                            gen_rtvec (1, reg),
20433                                            UNSPEC_PUSH_MULT));
20434
20435           if (dwarf_regs_mask & (1 << i))
20436             {
20437               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20438                                  reg);
20439               RTX_FRAME_RELATED_P (tmp) = 1;
20440               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20441             }
20442
20443           break;
20444         }
20445     }
20446
20447   for (j = 1, i++; j < num_regs; i++)
20448     {
20449       if (mask & (1 << i))
20450         {
20451           reg = gen_rtx_REG (SImode, i);
20452
20453           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20454
20455           if (dwarf_regs_mask & (1 << i))
20456             {
20457               tmp
20458                 = gen_rtx_SET (gen_frame_mem
20459                                (SImode,
20460                                 plus_constant (Pmode, stack_pointer_rtx,
20461                                                4 * j)),
20462                                reg);
20463               RTX_FRAME_RELATED_P (tmp) = 1;
20464               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20465             }
20466
20467           j++;
20468         }
20469     }
20470
20471   par = emit_insn (par);
20472
20473   tmp = gen_rtx_SET (stack_pointer_rtx,
20474                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20475   RTX_FRAME_RELATED_P (tmp) = 1;
20476   XVECEXP (dwarf, 0, 0) = tmp;
20477
20478   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20479
20480   return par;
20481 }
20482
20483 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20484    SIZE is the offset to be adjusted.
20485    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20486 static void
20487 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20488 {
20489   rtx dwarf;
20490
20491   RTX_FRAME_RELATED_P (insn) = 1;
20492   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20493   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20494 }
20495
20496 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20497    SAVED_REGS_MASK shows which registers need to be restored.
20498
20499    Unfortunately, since this insn does not reflect very well the actual
20500    semantics of the operation, we need to annotate the insn for the benefit
20501    of DWARF2 frame unwind information.  */
20502 static void
20503 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20504 {
20505   int num_regs = 0;
20506   int i, j;
20507   rtx par;
20508   rtx dwarf = NULL_RTX;
20509   rtx tmp, reg;
20510   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20511   int offset_adj;
20512   int emit_update;
20513
20514   offset_adj = return_in_pc ? 1 : 0;
20515   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20516     if (saved_regs_mask & (1 << i))
20517       num_regs++;
20518
20519   gcc_assert (num_regs && num_regs <= 16);
20520
20521   /* If SP is in reglist, then we don't emit SP update insn.  */
20522   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20523
20524   /* The parallel needs to hold num_regs SETs
20525      and one SET for the stack update.  */
20526   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20527
20528   if (return_in_pc)
20529     XVECEXP (par, 0, 0) = ret_rtx;
20530
20531   if (emit_update)
20532     {
20533       /* Increment the stack pointer, based on there being
20534          num_regs 4-byte registers to restore.  */
20535       tmp = gen_rtx_SET (stack_pointer_rtx,
20536                          plus_constant (Pmode,
20537                                         stack_pointer_rtx,
20538                                         4 * num_regs));
20539       RTX_FRAME_RELATED_P (tmp) = 1;
20540       XVECEXP (par, 0, offset_adj) = tmp;
20541     }
20542
20543   /* Now restore every reg, which may include PC.  */
20544   for (j = 0, i = 0; j < num_regs; i++)
20545     if (saved_regs_mask & (1 << i))
20546       {
20547         reg = gen_rtx_REG (SImode, i);
20548         if ((num_regs == 1) && emit_update && !return_in_pc)
20549           {
20550             /* Emit single load with writeback.  */
20551             tmp = gen_frame_mem (SImode,
20552                                  gen_rtx_POST_INC (Pmode,
20553                                                    stack_pointer_rtx));
20554             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20555             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20556             return;
20557           }
20558
20559         tmp = gen_rtx_SET (reg,
20560                            gen_frame_mem
20561                            (SImode,
20562                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20563         RTX_FRAME_RELATED_P (tmp) = 1;
20564         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20565
20566         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20567            should not have PC, skip PC.  */
20568         if (i != PC_REGNUM)
20569           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20570
20571         j++;
20572       }
20573
20574   if (return_in_pc)
20575     par = emit_jump_insn (par);
20576   else
20577     par = emit_insn (par);
20578
20579   REG_NOTES (par) = dwarf;
20580   if (!return_in_pc)
20581     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20582                                  stack_pointer_rtx, stack_pointer_rtx);
20583 }
20584
20585 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20586    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20587
20588    Unfortunately, since this insn does not reflect very well the actual
20589    semantics of the operation, we need to annotate the insn for the benefit
20590    of DWARF2 frame unwind information.  */
20591 static void
20592 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20593 {
20594   int i, j;
20595   rtx par;
20596   rtx dwarf = NULL_RTX;
20597   rtx tmp, reg;
20598
20599   gcc_assert (num_regs && num_regs <= 32);
20600
20601     /* Workaround ARM10 VFPr1 bug.  */
20602   if (num_regs == 2 && !arm_arch6)
20603     {
20604       if (first_reg == 15)
20605         first_reg--;
20606
20607       num_regs++;
20608     }
20609
20610   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20611      there could be up to 32 D-registers to restore.
20612      If there are more than 16 D-registers, make two recursive calls,
20613      each of which emits one pop_multi instruction.  */
20614   if (num_regs > 16)
20615     {
20616       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20617       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20618       return;
20619     }
20620
20621   /* The parallel needs to hold num_regs SETs
20622      and one SET for the stack update.  */
20623   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20624
20625   /* Increment the stack pointer, based on there being
20626      num_regs 8-byte registers to restore.  */
20627   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20628   RTX_FRAME_RELATED_P (tmp) = 1;
20629   XVECEXP (par, 0, 0) = tmp;
20630
20631   /* Now show every reg that will be restored, using a SET for each.  */
20632   for (j = 0, i=first_reg; j < num_regs; i += 2)
20633     {
20634       reg = gen_rtx_REG (DFmode, i);
20635
20636       tmp = gen_rtx_SET (reg,
20637                          gen_frame_mem
20638                          (DFmode,
20639                           plus_constant (Pmode, base_reg, 8 * j)));
20640       RTX_FRAME_RELATED_P (tmp) = 1;
20641       XVECEXP (par, 0, j + 1) = tmp;
20642
20643       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20644
20645       j++;
20646     }
20647
20648   par = emit_insn (par);
20649   REG_NOTES (par) = dwarf;
20650
20651   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20652   if (REGNO (base_reg) == IP_REGNUM)
20653     {
20654       RTX_FRAME_RELATED_P (par) = 1;
20655       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20656     }
20657   else
20658     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20659                                  base_reg, base_reg);
20660 }
20661
20662 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20663    number of registers are being popped, multiple LDRD patterns are created for
20664    all register pairs.  If odd number of registers are popped, last register is
20665    loaded by using LDR pattern.  */
20666 static void
20667 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20668 {
20669   int num_regs = 0;
20670   int i, j;
20671   rtx par = NULL_RTX;
20672   rtx dwarf = NULL_RTX;
20673   rtx tmp, reg, tmp1;
20674   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20675
20676   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20677     if (saved_regs_mask & (1 << i))
20678       num_regs++;
20679
20680   gcc_assert (num_regs && num_regs <= 16);
20681
20682   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20683      to be popped.  So, if num_regs is even, now it will become odd,
20684      and we can generate pop with PC.  If num_regs is odd, it will be
20685      even now, and ldr with return can be generated for PC.  */
20686   if (return_in_pc)
20687     num_regs--;
20688
20689   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20690
20691   /* Var j iterates over all the registers to gather all the registers in
20692      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20693      A PARALLEL RTX of register-pair is created here, so that pattern for
20694      LDRD can be matched.  As PC is always last register to be popped, and
20695      we have already decremented num_regs if PC, we don't have to worry
20696      about PC in this loop.  */
20697   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20698     if (saved_regs_mask & (1 << j))
20699       {
20700         /* Create RTX for memory load.  */
20701         reg = gen_rtx_REG (SImode, j);
20702         tmp = gen_rtx_SET (reg,
20703                            gen_frame_mem (SImode,
20704                                plus_constant (Pmode,
20705                                               stack_pointer_rtx, 4 * i)));
20706         RTX_FRAME_RELATED_P (tmp) = 1;
20707
20708         if (i % 2 == 0)
20709           {
20710             /* When saved-register index (i) is even, the RTX to be emitted is
20711                yet to be created.  Hence create it first.  The LDRD pattern we
20712                are generating is :
20713                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20714                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20715                where target registers need not be consecutive.  */
20716             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20717             dwarf = NULL_RTX;
20718           }
20719
20720         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20721            added as 0th element and if i is odd, reg_i is added as 1st element
20722            of LDRD pattern shown above.  */
20723         XVECEXP (par, 0, (i % 2)) = tmp;
20724         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20725
20726         if ((i % 2) == 1)
20727           {
20728             /* When saved-register index (i) is odd, RTXs for both the registers
20729                to be loaded are generated in above given LDRD pattern, and the
20730                pattern can be emitted now.  */
20731             par = emit_insn (par);
20732             REG_NOTES (par) = dwarf;
20733             RTX_FRAME_RELATED_P (par) = 1;
20734           }
20735
20736         i++;
20737       }
20738
20739   /* If the number of registers pushed is odd AND return_in_pc is false OR
20740      number of registers are even AND return_in_pc is true, last register is
20741      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20742      then LDR with post increment.  */
20743
20744   /* Increment the stack pointer, based on there being
20745      num_regs 4-byte registers to restore.  */
20746   tmp = gen_rtx_SET (stack_pointer_rtx,
20747                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20748   RTX_FRAME_RELATED_P (tmp) = 1;
20749   tmp = emit_insn (tmp);
20750   if (!return_in_pc)
20751     {
20752       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20753                                    stack_pointer_rtx, stack_pointer_rtx);
20754     }
20755
20756   dwarf = NULL_RTX;
20757
20758   if (((num_regs % 2) == 1 && !return_in_pc)
20759       || ((num_regs % 2) == 0 && return_in_pc))
20760     {
20761       /* Scan for the single register to be popped.  Skip until the saved
20762          register is found.  */
20763       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20764
20765       /* Gen LDR with post increment here.  */
20766       tmp1 = gen_rtx_MEM (SImode,
20767                           gen_rtx_POST_INC (SImode,
20768                                             stack_pointer_rtx));
20769       set_mem_alias_set (tmp1, get_frame_alias_set ());
20770
20771       reg = gen_rtx_REG (SImode, j);
20772       tmp = gen_rtx_SET (reg, tmp1);
20773       RTX_FRAME_RELATED_P (tmp) = 1;
20774       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20775
20776       if (return_in_pc)
20777         {
20778           /* If return_in_pc, j must be PC_REGNUM.  */
20779           gcc_assert (j == PC_REGNUM);
20780           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20781           XVECEXP (par, 0, 0) = ret_rtx;
20782           XVECEXP (par, 0, 1) = tmp;
20783           par = emit_jump_insn (par);
20784         }
20785       else
20786         {
20787           par = emit_insn (tmp);
20788           REG_NOTES (par) = dwarf;
20789           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20790                                        stack_pointer_rtx, stack_pointer_rtx);
20791         }
20792
20793     }
20794   else if ((num_regs % 2) == 1 && return_in_pc)
20795     {
20796       /* There are 2 registers to be popped.  So, generate the pattern
20797          pop_multiple_with_stack_update_and_return to pop in PC.  */
20798       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20799     }
20800
20801   return;
20802 }
20803
20804 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20805    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20806    offset addressing and then generates one separate stack udpate. This provides
20807    more scheduling freedom, compared to writeback on every load.  However,
20808    if the function returns using load into PC directly
20809    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20810    before the last load.  TODO: Add a peephole optimization to recognize
20811    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20812    peephole optimization to merge the load at stack-offset zero
20813    with the stack update instruction using load with writeback
20814    in post-index addressing mode.  */
20815 static void
20816 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20817 {
20818   int j = 0;
20819   int offset = 0;
20820   rtx par = NULL_RTX;
20821   rtx dwarf = NULL_RTX;
20822   rtx tmp, mem;
20823
20824   /* Restore saved registers.  */
20825   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20826   j = 0;
20827   while (j <= LAST_ARM_REGNUM)
20828     if (saved_regs_mask & (1 << j))
20829       {
20830         if ((j % 2) == 0
20831             && (saved_regs_mask & (1 << (j + 1)))
20832             && (j + 1) != PC_REGNUM)
20833           {
20834             /* Current register and next register form register pair for which
20835                LDRD can be generated. PC is always the last register popped, and
20836                we handle it separately.  */
20837             if (offset > 0)
20838               mem = gen_frame_mem (DImode,
20839                                    plus_constant (Pmode,
20840                                                   stack_pointer_rtx,
20841                                                   offset));
20842             else
20843               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20844
20845             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20846             tmp = emit_insn (tmp);
20847             RTX_FRAME_RELATED_P (tmp) = 1;
20848
20849             /* Generate dwarf info.  */
20850
20851             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20852                                     gen_rtx_REG (SImode, j),
20853                                     NULL_RTX);
20854             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20855                                     gen_rtx_REG (SImode, j + 1),
20856                                     dwarf);
20857
20858             REG_NOTES (tmp) = dwarf;
20859
20860             offset += 8;
20861             j += 2;
20862           }
20863         else if (j != PC_REGNUM)
20864           {
20865             /* Emit a single word load.  */
20866             if (offset > 0)
20867               mem = gen_frame_mem (SImode,
20868                                    plus_constant (Pmode,
20869                                                   stack_pointer_rtx,
20870                                                   offset));
20871             else
20872               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20873
20874             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20875             tmp = emit_insn (tmp);
20876             RTX_FRAME_RELATED_P (tmp) = 1;
20877
20878             /* Generate dwarf info.  */
20879             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20880                                               gen_rtx_REG (SImode, j),
20881                                               NULL_RTX);
20882
20883             offset += 4;
20884             j += 1;
20885           }
20886         else /* j == PC_REGNUM */
20887           j++;
20888       }
20889     else
20890       j++;
20891
20892   /* Update the stack.  */
20893   if (offset > 0)
20894     {
20895       tmp = gen_rtx_SET (stack_pointer_rtx,
20896                          plus_constant (Pmode,
20897                                         stack_pointer_rtx,
20898                                         offset));
20899       tmp = emit_insn (tmp);
20900       arm_add_cfa_adjust_cfa_note (tmp, offset,
20901                                    stack_pointer_rtx, stack_pointer_rtx);
20902       offset = 0;
20903     }
20904
20905   if (saved_regs_mask & (1 << PC_REGNUM))
20906     {
20907       /* Only PC is to be popped.  */
20908       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20909       XVECEXP (par, 0, 0) = ret_rtx;
20910       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20911                          gen_frame_mem (SImode,
20912                                         gen_rtx_POST_INC (SImode,
20913                                                           stack_pointer_rtx)));
20914       RTX_FRAME_RELATED_P (tmp) = 1;
20915       XVECEXP (par, 0, 1) = tmp;
20916       par = emit_jump_insn (par);
20917
20918       /* Generate dwarf info.  */
20919       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20920                               gen_rtx_REG (SImode, PC_REGNUM),
20921                               NULL_RTX);
20922       REG_NOTES (par) = dwarf;
20923       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20924                                    stack_pointer_rtx, stack_pointer_rtx);
20925     }
20926 }
20927
20928 /* Calculate the size of the return value that is passed in registers.  */
20929 static unsigned
20930 arm_size_return_regs (void)
20931 {
20932   machine_mode mode;
20933
20934   if (crtl->return_rtx != 0)
20935     mode = GET_MODE (crtl->return_rtx);
20936   else
20937     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20938
20939   return GET_MODE_SIZE (mode);
20940 }
20941
20942 /* Return true if the current function needs to save/restore LR.  */
20943 static bool
20944 thumb_force_lr_save (void)
20945 {
20946   return !cfun->machine->lr_save_eliminated
20947          && (!crtl->is_leaf
20948              || thumb_far_jump_used_p ()
20949              || df_regs_ever_live_p (LR_REGNUM));
20950 }
20951
20952 /* We do not know if r3 will be available because
20953    we do have an indirect tailcall happening in this
20954    particular case.  */
20955 static bool
20956 is_indirect_tailcall_p (rtx call)
20957 {
20958   rtx pat = PATTERN (call);
20959
20960   /* Indirect tail call.  */
20961   pat = XVECEXP (pat, 0, 0);
20962   if (GET_CODE (pat) == SET)
20963     pat = SET_SRC (pat);
20964
20965   pat = XEXP (XEXP (pat, 0), 0);
20966   return REG_P (pat);
20967 }
20968
20969 /* Return true if r3 is used by any of the tail call insns in the
20970    current function.  */
20971 static bool
20972 any_sibcall_could_use_r3 (void)
20973 {
20974   edge_iterator ei;
20975   edge e;
20976
20977   if (!crtl->tail_call_emit)
20978     return false;
20979   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20980     if (e->flags & EDGE_SIBCALL)
20981       {
20982         rtx_insn *call = BB_END (e->src);
20983         if (!CALL_P (call))
20984           call = prev_nonnote_nondebug_insn (call);
20985         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20986         if (find_regno_fusage (call, USE, 3)
20987             || is_indirect_tailcall_p (call))
20988           return true;
20989       }
20990   return false;
20991 }
20992
20993
20994 /* Compute the distance from register FROM to register TO.
20995    These can be the arg pointer (26), the soft frame pointer (25),
20996    the stack pointer (13) or the hard frame pointer (11).
20997    In thumb mode r7 is used as the soft frame pointer, if needed.
20998    Typical stack layout looks like this:
20999
21000        old stack pointer -> |    |
21001                              ----
21002                             |    | \
21003                             |    |   saved arguments for
21004                             |    |   vararg functions
21005                             |    | /
21006                               --
21007    hard FP & arg pointer -> |    | \
21008                             |    |   stack
21009                             |    |   frame
21010                             |    | /
21011                               --
21012                             |    | \
21013                             |    |   call saved
21014                             |    |   registers
21015       soft frame pointer -> |    | /
21016                               --
21017                             |    | \
21018                             |    |   local
21019                             |    |   variables
21020      locals base pointer -> |    | /
21021                               --
21022                             |    | \
21023                             |    |   outgoing
21024                             |    |   arguments
21025    current stack pointer -> |    | /
21026                               --
21027
21028   For a given function some or all of these stack components
21029   may not be needed, giving rise to the possibility of
21030   eliminating some of the registers.
21031
21032   The values returned by this function must reflect the behavior
21033   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21034
21035   The sign of the number returned reflects the direction of stack
21036   growth, so the values are positive for all eliminations except
21037   from the soft frame pointer to the hard frame pointer.
21038
21039   SFP may point just inside the local variables block to ensure correct
21040   alignment.  */
21041
21042
21043 /* Return cached stack offsets.  */
21044
21045 static arm_stack_offsets *
21046 arm_get_frame_offsets (void)
21047 {
21048   struct arm_stack_offsets *offsets;
21049
21050   offsets = &cfun->machine->stack_offsets;
21051
21052   return offsets;
21053 }
21054
21055
21056 /* Calculate stack offsets.  These are used to calculate register elimination
21057    offsets and in prologue/epilogue code.  Also calculates which registers
21058    should be saved.  */
21059
21060 static void
21061 arm_compute_frame_layout (void)
21062 {
21063   struct arm_stack_offsets *offsets;
21064   unsigned long func_type;
21065   int saved;
21066   int core_saved;
21067   HOST_WIDE_INT frame_size;
21068   int i;
21069
21070   offsets = &cfun->machine->stack_offsets;
21071
21072   /* Initially this is the size of the local variables.  It will translated
21073      into an offset once we have determined the size of preceding data.  */
21074   frame_size = ROUND_UP_WORD (get_frame_size ());
21075
21076   /* Space for variadic functions.  */
21077   offsets->saved_args = crtl->args.pretend_args_size;
21078
21079   /* In Thumb mode this is incorrect, but never used.  */
21080   offsets->frame
21081     = (offsets->saved_args
21082        + arm_compute_static_chain_stack_bytes ()
21083        + (frame_pointer_needed ? 4 : 0));
21084
21085   if (TARGET_32BIT)
21086     {
21087       unsigned int regno;
21088
21089       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21090       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21091       saved = core_saved;
21092
21093       /* We know that SP will be doubleword aligned on entry, and we must
21094          preserve that condition at any subroutine call.  We also require the
21095          soft frame pointer to be doubleword aligned.  */
21096
21097       if (TARGET_REALLY_IWMMXT)
21098         {
21099           /* Check for the call-saved iWMMXt registers.  */
21100           for (regno = FIRST_IWMMXT_REGNUM;
21101                regno <= LAST_IWMMXT_REGNUM;
21102                regno++)
21103             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21104               saved += 8;
21105         }
21106
21107       func_type = arm_current_func_type ();
21108       /* Space for saved VFP registers.  */
21109       if (! IS_VOLATILE (func_type)
21110           && TARGET_HARD_FLOAT)
21111         saved += arm_get_vfp_saved_size ();
21112     }
21113   else /* TARGET_THUMB1 */
21114     {
21115       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21116       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21117       saved = core_saved;
21118       if (TARGET_BACKTRACE)
21119         saved += 16;
21120     }
21121
21122   /* Saved registers include the stack frame.  */
21123   offsets->saved_regs
21124     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21125   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21126
21127   /* A leaf function does not need any stack alignment if it has nothing
21128      on the stack.  */
21129   if (crtl->is_leaf && frame_size == 0
21130       /* However if it calls alloca(), we have a dynamically allocated
21131          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21132       && ! cfun->calls_alloca)
21133     {
21134       offsets->outgoing_args = offsets->soft_frame;
21135       offsets->locals_base = offsets->soft_frame;
21136       return;
21137     }
21138
21139   /* Ensure SFP has the correct alignment.  */
21140   if (ARM_DOUBLEWORD_ALIGN
21141       && (offsets->soft_frame & 7))
21142     {
21143       offsets->soft_frame += 4;
21144       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21145          when there is a stack frame as the alignment will be rolled into
21146          the normal stack adjustment.  */
21147       if (frame_size + crtl->outgoing_args_size == 0)
21148         {
21149           int reg = -1;
21150
21151           /* Register r3 is caller-saved.  Normally it does not need to be
21152              saved on entry by the prologue.  However if we choose to save
21153              it for padding then we may confuse the compiler into thinking
21154              a prologue sequence is required when in fact it is not.  This
21155              will occur when shrink-wrapping if r3 is used as a scratch
21156              register and there are no other callee-saved writes.
21157
21158              This situation can be avoided when other callee-saved registers
21159              are available and r3 is not mandatory if we choose a callee-saved
21160              register for padding.  */
21161           bool prefer_callee_reg_p = false;
21162
21163           /* If it is safe to use r3, then do so.  This sometimes
21164              generates better code on Thumb-2 by avoiding the need to
21165              use 32-bit push/pop instructions.  */
21166           if (! any_sibcall_could_use_r3 ()
21167               && arm_size_return_regs () <= 12
21168               && (offsets->saved_regs_mask & (1 << 3)) == 0
21169               && (TARGET_THUMB2
21170                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21171             {
21172               reg = 3;
21173               if (!TARGET_THUMB2)
21174                 prefer_callee_reg_p = true;
21175             }
21176           if (reg == -1
21177               || prefer_callee_reg_p)
21178             {
21179               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21180                 {
21181                   /* Avoid fixed registers; they may be changed at
21182                      arbitrary times so it's unsafe to restore them
21183                      during the epilogue.  */
21184                   if (!fixed_regs[i]
21185                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21186                     {
21187                       reg = i;
21188                       break;
21189                     }
21190                 }
21191             }
21192
21193           if (reg != -1)
21194             {
21195               offsets->saved_regs += 4;
21196               offsets->saved_regs_mask |= (1 << reg);
21197             }
21198         }
21199     }
21200
21201   offsets->locals_base = offsets->soft_frame + frame_size;
21202   offsets->outgoing_args = (offsets->locals_base
21203                             + crtl->outgoing_args_size);
21204
21205   if (ARM_DOUBLEWORD_ALIGN)
21206     {
21207       /* Ensure SP remains doubleword aligned.  */
21208       if (offsets->outgoing_args & 7)
21209         offsets->outgoing_args += 4;
21210       gcc_assert (!(offsets->outgoing_args & 7));
21211     }
21212 }
21213
21214
21215 /* Calculate the relative offsets for the different stack pointers.  Positive
21216    offsets are in the direction of stack growth.  */
21217
21218 HOST_WIDE_INT
21219 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21220 {
21221   arm_stack_offsets *offsets;
21222
21223   offsets = arm_get_frame_offsets ();
21224
21225   /* OK, now we have enough information to compute the distances.
21226      There must be an entry in these switch tables for each pair
21227      of registers in ELIMINABLE_REGS, even if some of the entries
21228      seem to be redundant or useless.  */
21229   switch (from)
21230     {
21231     case ARG_POINTER_REGNUM:
21232       switch (to)
21233         {
21234         case THUMB_HARD_FRAME_POINTER_REGNUM:
21235           return 0;
21236
21237         case FRAME_POINTER_REGNUM:
21238           /* This is the reverse of the soft frame pointer
21239              to hard frame pointer elimination below.  */
21240           return offsets->soft_frame - offsets->saved_args;
21241
21242         case ARM_HARD_FRAME_POINTER_REGNUM:
21243           /* This is only non-zero in the case where the static chain register
21244              is stored above the frame.  */
21245           return offsets->frame - offsets->saved_args - 4;
21246
21247         case STACK_POINTER_REGNUM:
21248           /* If nothing has been pushed on the stack at all
21249              then this will return -4.  This *is* correct!  */
21250           return offsets->outgoing_args - (offsets->saved_args + 4);
21251
21252         default:
21253           gcc_unreachable ();
21254         }
21255       gcc_unreachable ();
21256
21257     case FRAME_POINTER_REGNUM:
21258       switch (to)
21259         {
21260         case THUMB_HARD_FRAME_POINTER_REGNUM:
21261           return 0;
21262
21263         case ARM_HARD_FRAME_POINTER_REGNUM:
21264           /* The hard frame pointer points to the top entry in the
21265              stack frame.  The soft frame pointer to the bottom entry
21266              in the stack frame.  If there is no stack frame at all,
21267              then they are identical.  */
21268
21269           return offsets->frame - offsets->soft_frame;
21270
21271         case STACK_POINTER_REGNUM:
21272           return offsets->outgoing_args - offsets->soft_frame;
21273
21274         default:
21275           gcc_unreachable ();
21276         }
21277       gcc_unreachable ();
21278
21279     default:
21280       /* You cannot eliminate from the stack pointer.
21281          In theory you could eliminate from the hard frame
21282          pointer to the stack pointer, but this will never
21283          happen, since if a stack frame is not needed the
21284          hard frame pointer will never be used.  */
21285       gcc_unreachable ();
21286     }
21287 }
21288
21289 /* Given FROM and TO register numbers, say whether this elimination is
21290    allowed.  Frame pointer elimination is automatically handled.
21291
21292    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21293    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21294    pointer, we must eliminate FRAME_POINTER_REGNUM into
21295    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21296    ARG_POINTER_REGNUM.  */
21297
21298 bool
21299 arm_can_eliminate (const int from, const int to)
21300 {
21301   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21302           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21303           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21304           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21305            true);
21306 }
21307
21308 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21309    number of bytes pushed.  */
21310
21311 static int
21312 arm_save_coproc_regs(void)
21313 {
21314   int saved_size = 0;
21315   unsigned reg;
21316   unsigned start_reg;
21317   rtx insn;
21318
21319   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21320     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21321       {
21322         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21323         insn = gen_rtx_MEM (V2SImode, insn);
21324         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21325         RTX_FRAME_RELATED_P (insn) = 1;
21326         saved_size += 8;
21327       }
21328
21329   if (TARGET_HARD_FLOAT)
21330     {
21331       start_reg = FIRST_VFP_REGNUM;
21332
21333       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21334         {
21335           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21336               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21337             {
21338               if (start_reg != reg)
21339                 saved_size += vfp_emit_fstmd (start_reg,
21340                                               (reg - start_reg) / 2);
21341               start_reg = reg + 2;
21342             }
21343         }
21344       if (start_reg != reg)
21345         saved_size += vfp_emit_fstmd (start_reg,
21346                                       (reg - start_reg) / 2);
21347     }
21348   return saved_size;
21349 }
21350
21351
21352 /* Set the Thumb frame pointer from the stack pointer.  */
21353
21354 static void
21355 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21356 {
21357   HOST_WIDE_INT amount;
21358   rtx insn, dwarf;
21359
21360   amount = offsets->outgoing_args - offsets->locals_base;
21361   if (amount < 1024)
21362     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21363                                   stack_pointer_rtx, GEN_INT (amount)));
21364   else
21365     {
21366       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21367       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21368          expects the first two operands to be the same.  */
21369       if (TARGET_THUMB2)
21370         {
21371           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21372                                         stack_pointer_rtx,
21373                                         hard_frame_pointer_rtx));
21374         }
21375       else
21376         {
21377           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21378                                         hard_frame_pointer_rtx,
21379                                         stack_pointer_rtx));
21380         }
21381       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21382                            plus_constant (Pmode, stack_pointer_rtx, amount));
21383       RTX_FRAME_RELATED_P (dwarf) = 1;
21384       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21385     }
21386
21387   RTX_FRAME_RELATED_P (insn) = 1;
21388 }
21389
21390 struct scratch_reg {
21391   rtx reg;
21392   bool saved;
21393 };
21394
21395 /* Return a short-lived scratch register for use as a 2nd scratch register on
21396    function entry after the registers are saved in the prologue.  This register
21397    must be released by means of release_scratch_register_on_entry.  IP is not
21398    considered since it is always used as the 1st scratch register if available.
21399
21400    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21401    mask of live registers.  */
21402
21403 static void
21404 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21405                                unsigned long live_regs)
21406 {
21407   int regno = -1;
21408
21409   sr->saved = false;
21410
21411   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21412     regno = LR_REGNUM;
21413   else
21414     {
21415       unsigned int i;
21416
21417       for (i = 4; i < 11; i++)
21418         if (regno1 != i && (live_regs & (1 << i)) != 0)
21419           {
21420             regno = i;
21421             break;
21422           }
21423
21424       if (regno < 0)
21425         {
21426           /* If IP is used as the 1st scratch register for a nested function,
21427              then either r3 wasn't available or is used to preserve IP.  */
21428           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21429             regno1 = 3;
21430           regno = (regno1 == 3 ? 2 : 3);
21431           sr->saved
21432             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21433                                regno);
21434         }
21435     }
21436
21437   sr->reg = gen_rtx_REG (SImode, regno);
21438   if (sr->saved)
21439     {
21440       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21441       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21442       rtx x = gen_rtx_SET (stack_pointer_rtx,
21443                            plus_constant (Pmode, stack_pointer_rtx, -4));
21444       RTX_FRAME_RELATED_P (insn) = 1;
21445       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21446     }
21447 }
21448
21449 /* Release a scratch register obtained from the preceding function.  */
21450
21451 static void
21452 release_scratch_register_on_entry (struct scratch_reg *sr)
21453 {
21454   if (sr->saved)
21455     {
21456       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21457       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21458       rtx x = gen_rtx_SET (stack_pointer_rtx,
21459                            plus_constant (Pmode, stack_pointer_rtx, 4));
21460       RTX_FRAME_RELATED_P (insn) = 1;
21461       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21462     }
21463 }
21464
21465 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21466
21467 #if PROBE_INTERVAL > 4096
21468 #error Cannot use indexed addressing mode for stack probing
21469 #endif
21470
21471 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21472    inclusive.  These are offsets from the current stack pointer.  REGNO1
21473    is the index number of the 1st scratch register and LIVE_REGS is the
21474    mask of live registers.  */
21475
21476 static void
21477 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21478                             unsigned int regno1, unsigned long live_regs)
21479 {
21480   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21481
21482   /* See if we have a constant small number of probes to generate.  If so,
21483      that's the easy case.  */
21484   if (size <= PROBE_INTERVAL)
21485     {
21486       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21487       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21488       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21489     }
21490
21491   /* The run-time loop is made up of 10 insns in the generic case while the
21492      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21493   else if (size <= 5 * PROBE_INTERVAL)
21494     {
21495       HOST_WIDE_INT i, rem;
21496
21497       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21498       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21499       emit_stack_probe (reg1);
21500
21501       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21502          it exceeds SIZE.  If only two probes are needed, this will not
21503          generate any code.  Then probe at FIRST + SIZE.  */
21504       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21505         {
21506           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21507           emit_stack_probe (reg1);
21508         }
21509
21510       rem = size - (i - PROBE_INTERVAL);
21511       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21512         {
21513           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21514           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21515         }
21516       else
21517         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21518     }
21519
21520   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21521      extra careful with variables wrapping around because we might be at
21522      the very top (or the very bottom) of the address space and we have
21523      to be able to handle this case properly; in particular, we use an
21524      equality test for the loop condition.  */
21525   else
21526     {
21527       HOST_WIDE_INT rounded_size;
21528       struct scratch_reg sr;
21529
21530       get_scratch_register_on_entry (&sr, regno1, live_regs);
21531
21532       emit_move_insn (reg1, GEN_INT (first));
21533
21534
21535       /* Step 1: round SIZE to the previous multiple of the interval.  */
21536
21537       rounded_size = size & -PROBE_INTERVAL;
21538       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21539
21540
21541       /* Step 2: compute initial and final value of the loop counter.  */
21542
21543       /* TEST_ADDR = SP + FIRST.  */
21544       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21545
21546       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21547       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21548
21549
21550       /* Step 3: the loop
21551
21552          do
21553            {
21554              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21555              probe at TEST_ADDR
21556            }
21557          while (TEST_ADDR != LAST_ADDR)
21558
21559          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21560          until it is equal to ROUNDED_SIZE.  */
21561
21562       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21563
21564
21565       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21566          that SIZE is equal to ROUNDED_SIZE.  */
21567
21568       if (size != rounded_size)
21569         {
21570           HOST_WIDE_INT rem = size - rounded_size;
21571
21572           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21573             {
21574               emit_set_insn (sr.reg,
21575                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21576               emit_stack_probe (plus_constant (Pmode, sr.reg,
21577                                                PROBE_INTERVAL - rem));
21578             }
21579           else
21580             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21581         }
21582
21583       release_scratch_register_on_entry (&sr);
21584     }
21585
21586   /* Make sure nothing is scheduled before we are done.  */
21587   emit_insn (gen_blockage ());
21588 }
21589
21590 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21591    absolute addresses.  */
21592
21593 const char *
21594 output_probe_stack_range (rtx reg1, rtx reg2)
21595 {
21596   static int labelno = 0;
21597   char loop_lab[32];
21598   rtx xops[2];
21599
21600   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21601
21602   /* Loop.  */
21603   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21604
21605   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21606   xops[0] = reg1;
21607   xops[1] = GEN_INT (PROBE_INTERVAL);
21608   output_asm_insn ("sub\t%0, %0, %1", xops);
21609
21610   /* Probe at TEST_ADDR.  */
21611   output_asm_insn ("str\tr0, [%0, #0]", xops);
21612
21613   /* Test if TEST_ADDR == LAST_ADDR.  */
21614   xops[1] = reg2;
21615   output_asm_insn ("cmp\t%0, %1", xops);
21616
21617   /* Branch.  */
21618   fputs ("\tbne\t", asm_out_file);
21619   assemble_name_raw (asm_out_file, loop_lab);
21620   fputc ('\n', asm_out_file);
21621
21622   return "";
21623 }
21624
21625 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21626    function.  */
21627 void
21628 arm_expand_prologue (void)
21629 {
21630   rtx amount;
21631   rtx insn;
21632   rtx ip_rtx;
21633   unsigned long live_regs_mask;
21634   unsigned long func_type;
21635   int fp_offset = 0;
21636   int saved_pretend_args = 0;
21637   int saved_regs = 0;
21638   unsigned HOST_WIDE_INT args_to_push;
21639   HOST_WIDE_INT size;
21640   arm_stack_offsets *offsets;
21641   bool clobber_ip;
21642
21643   func_type = arm_current_func_type ();
21644
21645   /* Naked functions don't have prologues.  */
21646   if (IS_NAKED (func_type))
21647     {
21648       if (flag_stack_usage_info)
21649         current_function_static_stack_size = 0;
21650       return;
21651     }
21652
21653   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21654   args_to_push = crtl->args.pretend_args_size;
21655
21656   /* Compute which register we will have to save onto the stack.  */
21657   offsets = arm_get_frame_offsets ();
21658   live_regs_mask = offsets->saved_regs_mask;
21659
21660   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21661
21662   if (IS_STACKALIGN (func_type))
21663     {
21664       rtx r0, r1;
21665
21666       /* Handle a word-aligned stack pointer.  We generate the following:
21667
21668           mov r0, sp
21669           bic r1, r0, #7
21670           mov sp, r1
21671           <save and restore r0 in normal prologue/epilogue>
21672           mov sp, r0
21673           bx lr
21674
21675          The unwinder doesn't need to know about the stack realignment.
21676          Just tell it we saved SP in r0.  */
21677       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21678
21679       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21680       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21681
21682       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21683       RTX_FRAME_RELATED_P (insn) = 1;
21684       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21685
21686       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21687
21688       /* ??? The CFA changes here, which may cause GDB to conclude that it
21689          has entered a different function.  That said, the unwind info is
21690          correct, individually, before and after this instruction because
21691          we've described the save of SP, which will override the default
21692          handling of SP as restoring from the CFA.  */
21693       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21694     }
21695
21696   /* Let's compute the static_chain_stack_bytes required and store it.  Right
21697      now the value must be -1 as stored by arm_init_machine_status ().  */
21698   cfun->machine->static_chain_stack_bytes
21699     = arm_compute_static_chain_stack_bytes ();
21700
21701   /* The static chain register is the same as the IP register.  If it is
21702      clobbered when creating the frame, we need to save and restore it.  */
21703   clobber_ip = IS_NESTED (func_type)
21704                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21705                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21706                         || flag_stack_clash_protection)
21707                        && !df_regs_ever_live_p (LR_REGNUM)
21708                        && arm_r3_live_at_start_p ()));
21709
21710   /* Find somewhere to store IP whilst the frame is being created.
21711      We try the following places in order:
21712
21713        1. The last argument register r3 if it is available.
21714        2. A slot on the stack above the frame if there are no
21715           arguments to push onto the stack.
21716        3. Register r3 again, after pushing the argument registers
21717           onto the stack, if this is a varargs function.
21718        4. The last slot on the stack created for the arguments to
21719           push, if this isn't a varargs function.
21720
21721      Note - we only need to tell the dwarf2 backend about the SP
21722      adjustment in the second variant; the static chain register
21723      doesn't need to be unwound, as it doesn't contain a value
21724      inherited from the caller.  */
21725   if (clobber_ip)
21726     {
21727       if (!arm_r3_live_at_start_p ())
21728         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21729       else if (args_to_push == 0)
21730         {
21731           rtx addr, dwarf;
21732
21733           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21734           saved_regs += 4;
21735
21736           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21737           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21738           fp_offset = 4;
21739
21740           /* Just tell the dwarf backend that we adjusted SP.  */
21741           dwarf = gen_rtx_SET (stack_pointer_rtx,
21742                                plus_constant (Pmode, stack_pointer_rtx,
21743                                               -fp_offset));
21744           RTX_FRAME_RELATED_P (insn) = 1;
21745           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21746         }
21747       else
21748         {
21749           /* Store the args on the stack.  */
21750           if (cfun->machine->uses_anonymous_args)
21751             {
21752               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21753                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21754               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21755               saved_pretend_args = 1;
21756             }
21757           else
21758             {
21759               rtx addr, dwarf;
21760
21761               if (args_to_push == 4)
21762                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21763               else
21764                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21765                                            plus_constant (Pmode,
21766                                                           stack_pointer_rtx,
21767                                                           -args_to_push));
21768
21769               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21770
21771               /* Just tell the dwarf backend that we adjusted SP.  */
21772               dwarf = gen_rtx_SET (stack_pointer_rtx,
21773                                    plus_constant (Pmode, stack_pointer_rtx,
21774                                                   -args_to_push));
21775               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21776             }
21777
21778           RTX_FRAME_RELATED_P (insn) = 1;
21779           fp_offset = args_to_push;
21780           args_to_push = 0;
21781         }
21782     }
21783
21784   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21785     {
21786       if (IS_INTERRUPT (func_type))
21787         {
21788           /* Interrupt functions must not corrupt any registers.
21789              Creating a frame pointer however, corrupts the IP
21790              register, so we must push it first.  */
21791           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21792
21793           /* Do not set RTX_FRAME_RELATED_P on this insn.
21794              The dwarf stack unwinding code only wants to see one
21795              stack decrement per function, and this is not it.  If
21796              this instruction is labeled as being part of the frame
21797              creation sequence then dwarf2out_frame_debug_expr will
21798              die when it encounters the assignment of IP to FP
21799              later on, since the use of SP here establishes SP as
21800              the CFA register and not IP.
21801
21802              Anyway this instruction is not really part of the stack
21803              frame creation although it is part of the prologue.  */
21804         }
21805
21806       insn = emit_set_insn (ip_rtx,
21807                             plus_constant (Pmode, stack_pointer_rtx,
21808                                            fp_offset));
21809       RTX_FRAME_RELATED_P (insn) = 1;
21810     }
21811
21812   if (args_to_push)
21813     {
21814       /* Push the argument registers, or reserve space for them.  */
21815       if (cfun->machine->uses_anonymous_args)
21816         insn = emit_multi_reg_push
21817           ((0xf0 >> (args_to_push / 4)) & 0xf,
21818            (0xf0 >> (args_to_push / 4)) & 0xf);
21819       else
21820         insn = emit_insn
21821           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21822                        GEN_INT (- args_to_push)));
21823       RTX_FRAME_RELATED_P (insn) = 1;
21824     }
21825
21826   /* If this is an interrupt service routine, and the link register
21827      is going to be pushed, and we're not generating extra
21828      push of IP (needed when frame is needed and frame layout if apcs),
21829      subtracting four from LR now will mean that the function return
21830      can be done with a single instruction.  */
21831   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21832       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21833       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21834       && TARGET_ARM)
21835     {
21836       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21837
21838       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21839     }
21840
21841   if (live_regs_mask)
21842     {
21843       unsigned long dwarf_regs_mask = live_regs_mask;
21844
21845       saved_regs += bit_count (live_regs_mask) * 4;
21846       if (optimize_size && !frame_pointer_needed
21847           && saved_regs == offsets->saved_regs - offsets->saved_args)
21848         {
21849           /* If no coprocessor registers are being pushed and we don't have
21850              to worry about a frame pointer then push extra registers to
21851              create the stack frame.  This is done in a way that does not
21852              alter the frame layout, so is independent of the epilogue.  */
21853           int n;
21854           int frame;
21855           n = 0;
21856           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21857             n++;
21858           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21859           if (frame && n * 4 >= frame)
21860             {
21861               n = frame / 4;
21862               live_regs_mask |= (1 << n) - 1;
21863               saved_regs += frame;
21864             }
21865         }
21866
21867       if (TARGET_LDRD
21868           && current_tune->prefer_ldrd_strd
21869           && !optimize_function_for_size_p (cfun))
21870         {
21871           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21872           if (TARGET_THUMB2)
21873             thumb2_emit_strd_push (live_regs_mask);
21874           else if (TARGET_ARM
21875                    && !TARGET_APCS_FRAME
21876                    && !IS_INTERRUPT (func_type))
21877             arm_emit_strd_push (live_regs_mask);
21878           else
21879             {
21880               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21881               RTX_FRAME_RELATED_P (insn) = 1;
21882             }
21883         }
21884       else
21885         {
21886           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21887           RTX_FRAME_RELATED_P (insn) = 1;
21888         }
21889     }
21890
21891   if (! IS_VOLATILE (func_type))
21892     saved_regs += arm_save_coproc_regs ();
21893
21894   if (frame_pointer_needed && TARGET_ARM)
21895     {
21896       /* Create the new frame pointer.  */
21897       if (TARGET_APCS_FRAME)
21898         {
21899           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21900           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21901           RTX_FRAME_RELATED_P (insn) = 1;
21902         }
21903       else
21904         {
21905           insn = GEN_INT (saved_regs - (4 + fp_offset));
21906           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21907                                         stack_pointer_rtx, insn));
21908           RTX_FRAME_RELATED_P (insn) = 1;
21909         }
21910     }
21911
21912   size = offsets->outgoing_args - offsets->saved_args;
21913   if (flag_stack_usage_info)
21914     current_function_static_stack_size = size;
21915
21916   /* If this isn't an interrupt service routine and we have a frame, then do
21917      stack checking.  We use IP as the first scratch register, except for the
21918      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21919   if (!IS_INTERRUPT (func_type)
21920       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21921           || flag_stack_clash_protection))
21922     {
21923       unsigned int regno;
21924
21925       if (!IS_NESTED (func_type) || clobber_ip)
21926         regno = IP_REGNUM;
21927       else if (df_regs_ever_live_p (LR_REGNUM))
21928         regno = LR_REGNUM;
21929       else
21930         regno = 3;
21931
21932       if (crtl->is_leaf && !cfun->calls_alloca)
21933         {
21934           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21935             arm_emit_probe_stack_range (get_stack_check_protect (),
21936                                         size - get_stack_check_protect (),
21937                                         regno, live_regs_mask);
21938         }
21939       else if (size > 0)
21940         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21941                                     regno, live_regs_mask);
21942     }
21943
21944   /* Recover the static chain register.  */
21945   if (clobber_ip)
21946     {
21947       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21948         insn = gen_rtx_REG (SImode, 3);
21949       else
21950         {
21951           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21952           insn = gen_frame_mem (SImode, insn);
21953         }
21954       emit_set_insn (ip_rtx, insn);
21955       emit_insn (gen_force_register_use (ip_rtx));
21956     }
21957
21958   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21959     {
21960       /* This add can produce multiple insns for a large constant, so we
21961          need to get tricky.  */
21962       rtx_insn *last = get_last_insn ();
21963
21964       amount = GEN_INT (offsets->saved_args + saved_regs
21965                         - offsets->outgoing_args);
21966
21967       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21968                                     amount));
21969       do
21970         {
21971           last = last ? NEXT_INSN (last) : get_insns ();
21972           RTX_FRAME_RELATED_P (last) = 1;
21973         }
21974       while (last != insn);
21975
21976       /* If the frame pointer is needed, emit a special barrier that
21977          will prevent the scheduler from moving stores to the frame
21978          before the stack adjustment.  */
21979       if (frame_pointer_needed)
21980         emit_insn (gen_stack_tie (stack_pointer_rtx,
21981                                   hard_frame_pointer_rtx));
21982     }
21983
21984
21985   if (frame_pointer_needed && TARGET_THUMB2)
21986     thumb_set_frame_pointer (offsets);
21987
21988   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21989     {
21990       unsigned long mask;
21991
21992       mask = live_regs_mask;
21993       mask &= THUMB2_WORK_REGS;
21994       if (!IS_NESTED (func_type))
21995         mask |= (1 << IP_REGNUM);
21996       arm_load_pic_register (mask);
21997     }
21998
21999   /* If we are profiling, make sure no instructions are scheduled before
22000      the call to mcount.  Similarly if the user has requested no
22001      scheduling in the prolog.  Similarly if we want non-call exceptions
22002      using the EABI unwinder, to prevent faulting instructions from being
22003      swapped with a stack adjustment.  */
22004   if (crtl->profile || !TARGET_SCHED_PROLOG
22005       || (arm_except_unwind_info (&global_options) == UI_TARGET
22006           && cfun->can_throw_non_call_exceptions))
22007     emit_insn (gen_blockage ());
22008
22009   /* If the link register is being kept alive, with the return address in it,
22010      then make sure that it does not get reused by the ce2 pass.  */
22011   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22012     cfun->machine->lr_save_eliminated = 1;
22013 }
22014 \f
22015 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22016 static void
22017 arm_print_condition (FILE *stream)
22018 {
22019   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22020     {
22021       /* Branch conversion is not implemented for Thumb-2.  */
22022       if (TARGET_THUMB)
22023         {
22024           output_operand_lossage ("predicated Thumb instruction");
22025           return;
22026         }
22027       if (current_insn_predicate != NULL)
22028         {
22029           output_operand_lossage
22030             ("predicated instruction in conditional sequence");
22031           return;
22032         }
22033
22034       fputs (arm_condition_codes[arm_current_cc], stream);
22035     }
22036   else if (current_insn_predicate)
22037     {
22038       enum arm_cond_code code;
22039
22040       if (TARGET_THUMB1)
22041         {
22042           output_operand_lossage ("predicated Thumb instruction");
22043           return;
22044         }
22045
22046       code = get_arm_condition_code (current_insn_predicate);
22047       fputs (arm_condition_codes[code], stream);
22048     }
22049 }
22050
22051
22052 /* Globally reserved letters: acln
22053    Puncutation letters currently used: @_|?().!#
22054    Lower case letters currently used: bcdefhimpqtvwxyz
22055    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22056    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22057
22058    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22059
22060    If CODE is 'd', then the X is a condition operand and the instruction
22061    should only be executed if the condition is true.
22062    if CODE is 'D', then the X is a condition operand and the instruction
22063    should only be executed if the condition is false: however, if the mode
22064    of the comparison is CCFPEmode, then always execute the instruction -- we
22065    do this because in these circumstances !GE does not necessarily imply LT;
22066    in these cases the instruction pattern will take care to make sure that
22067    an instruction containing %d will follow, thereby undoing the effects of
22068    doing this instruction unconditionally.
22069    If CODE is 'N' then X is a floating point operand that must be negated
22070    before output.
22071    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22072    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22073 static void
22074 arm_print_operand (FILE *stream, rtx x, int code)
22075 {
22076   switch (code)
22077     {
22078     case '@':
22079       fputs (ASM_COMMENT_START, stream);
22080       return;
22081
22082     case '_':
22083       fputs (user_label_prefix, stream);
22084       return;
22085
22086     case '|':
22087       fputs (REGISTER_PREFIX, stream);
22088       return;
22089
22090     case '?':
22091       arm_print_condition (stream);
22092       return;
22093
22094     case '.':
22095       /* The current condition code for a condition code setting instruction.
22096          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22097       fputc('s', stream);
22098       arm_print_condition (stream);
22099       return;
22100
22101     case '!':
22102       /* If the instruction is conditionally executed then print
22103          the current condition code, otherwise print 's'.  */
22104       gcc_assert (TARGET_THUMB2);
22105       if (current_insn_predicate)
22106         arm_print_condition (stream);
22107       else
22108         fputc('s', stream);
22109       break;
22110
22111     /* %# is a "break" sequence. It doesn't output anything, but is used to
22112        separate e.g. operand numbers from following text, if that text consists
22113        of further digits which we don't want to be part of the operand
22114        number.  */
22115     case '#':
22116       return;
22117
22118     case 'N':
22119       {
22120         REAL_VALUE_TYPE r;
22121         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22122         fprintf (stream, "%s", fp_const_from_val (&r));
22123       }
22124       return;
22125
22126     /* An integer or symbol address without a preceding # sign.  */
22127     case 'c':
22128       switch (GET_CODE (x))
22129         {
22130         case CONST_INT:
22131           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22132           break;
22133
22134         case SYMBOL_REF:
22135           output_addr_const (stream, x);
22136           break;
22137
22138         case CONST:
22139           if (GET_CODE (XEXP (x, 0)) == PLUS
22140               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22141             {
22142               output_addr_const (stream, x);
22143               break;
22144             }
22145           /* Fall through.  */
22146
22147         default:
22148           output_operand_lossage ("Unsupported operand for code '%c'", code);
22149         }
22150       return;
22151
22152     /* An integer that we want to print in HEX.  */
22153     case 'x':
22154       switch (GET_CODE (x))
22155         {
22156         case CONST_INT:
22157           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22158           break;
22159
22160         default:
22161           output_operand_lossage ("Unsupported operand for code '%c'", code);
22162         }
22163       return;
22164
22165     case 'B':
22166       if (CONST_INT_P (x))
22167         {
22168           HOST_WIDE_INT val;
22169           val = ARM_SIGN_EXTEND (~INTVAL (x));
22170           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22171         }
22172       else
22173         {
22174           putc ('~', stream);
22175           output_addr_const (stream, x);
22176         }
22177       return;
22178
22179     case 'b':
22180       /* Print the log2 of a CONST_INT.  */
22181       {
22182         HOST_WIDE_INT val;
22183
22184         if (!CONST_INT_P (x)
22185             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22186           output_operand_lossage ("Unsupported operand for code '%c'", code);
22187         else
22188           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22189       }
22190       return;
22191
22192     case 'L':
22193       /* The low 16 bits of an immediate constant.  */
22194       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22195       return;
22196
22197     case 'i':
22198       fprintf (stream, "%s", arithmetic_instr (x, 1));
22199       return;
22200
22201     case 'I':
22202       fprintf (stream, "%s", arithmetic_instr (x, 0));
22203       return;
22204
22205     case 'S':
22206       {
22207         HOST_WIDE_INT val;
22208         const char *shift;
22209
22210         shift = shift_op (x, &val);
22211
22212         if (shift)
22213           {
22214             fprintf (stream, ", %s ", shift);
22215             if (val == -1)
22216               arm_print_operand (stream, XEXP (x, 1), 0);
22217             else
22218               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22219           }
22220       }
22221       return;
22222
22223       /* An explanation of the 'Q', 'R' and 'H' register operands:
22224
22225          In a pair of registers containing a DI or DF value the 'Q'
22226          operand returns the register number of the register containing
22227          the least significant part of the value.  The 'R' operand returns
22228          the register number of the register containing the most
22229          significant part of the value.
22230
22231          The 'H' operand returns the higher of the two register numbers.
22232          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22233          same as the 'Q' operand, since the most significant part of the
22234          value is held in the lower number register.  The reverse is true
22235          on systems where WORDS_BIG_ENDIAN is false.
22236
22237          The purpose of these operands is to distinguish between cases
22238          where the endian-ness of the values is important (for example
22239          when they are added together), and cases where the endian-ness
22240          is irrelevant, but the order of register operations is important.
22241          For example when loading a value from memory into a register
22242          pair, the endian-ness does not matter.  Provided that the value
22243          from the lower memory address is put into the lower numbered
22244          register, and the value from the higher address is put into the
22245          higher numbered register, the load will work regardless of whether
22246          the value being loaded is big-wordian or little-wordian.  The
22247          order of the two register loads can matter however, if the address
22248          of the memory location is actually held in one of the registers
22249          being overwritten by the load.
22250
22251          The 'Q' and 'R' constraints are also available for 64-bit
22252          constants.  */
22253     case 'Q':
22254       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22255         {
22256           rtx part = gen_lowpart (SImode, x);
22257           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22258           return;
22259         }
22260
22261       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22262         {
22263           output_operand_lossage ("invalid operand for code '%c'", code);
22264           return;
22265         }
22266
22267       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22268       return;
22269
22270     case 'R':
22271       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22272         {
22273           machine_mode mode = GET_MODE (x);
22274           rtx part;
22275
22276           if (mode == VOIDmode)
22277             mode = DImode;
22278           part = gen_highpart_mode (SImode, mode, x);
22279           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22280           return;
22281         }
22282
22283       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22284         {
22285           output_operand_lossage ("invalid operand for code '%c'", code);
22286           return;
22287         }
22288
22289       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22290       return;
22291
22292     case 'H':
22293       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22294         {
22295           output_operand_lossage ("invalid operand for code '%c'", code);
22296           return;
22297         }
22298
22299       asm_fprintf (stream, "%r", REGNO (x) + 1);
22300       return;
22301
22302     case 'J':
22303       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22304         {
22305           output_operand_lossage ("invalid operand for code '%c'", code);
22306           return;
22307         }
22308
22309       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22310       return;
22311
22312     case 'K':
22313       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22314         {
22315           output_operand_lossage ("invalid operand for code '%c'", code);
22316           return;
22317         }
22318
22319       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22320       return;
22321
22322     case 'm':
22323       asm_fprintf (stream, "%r",
22324                    REG_P (XEXP (x, 0))
22325                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22326       return;
22327
22328     case 'M':
22329       asm_fprintf (stream, "{%r-%r}",
22330                    REGNO (x),
22331                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22332       return;
22333
22334     /* Like 'M', but writing doubleword vector registers, for use by Neon
22335        insns.  */
22336     case 'h':
22337       {
22338         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22339         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22340         if (numregs == 1)
22341           asm_fprintf (stream, "{d%d}", regno);
22342         else
22343           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22344       }
22345       return;
22346
22347     case 'd':
22348       /* CONST_TRUE_RTX means always -- that's the default.  */
22349       if (x == const_true_rtx)
22350         return;
22351
22352       if (!COMPARISON_P (x))
22353         {
22354           output_operand_lossage ("invalid operand for code '%c'", code);
22355           return;
22356         }
22357
22358       fputs (arm_condition_codes[get_arm_condition_code (x)],
22359              stream);
22360       return;
22361
22362     case 'D':
22363       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22364          want to do that.  */
22365       if (x == const_true_rtx)
22366         {
22367           output_operand_lossage ("instruction never executed");
22368           return;
22369         }
22370       if (!COMPARISON_P (x))
22371         {
22372           output_operand_lossage ("invalid operand for code '%c'", code);
22373           return;
22374         }
22375
22376       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22377                                  (get_arm_condition_code (x))],
22378              stream);
22379       return;
22380
22381     case 's':
22382     case 'V':
22383     case 'W':
22384     case 'X':
22385     case 'Y':
22386     case 'Z':
22387       /* Former Maverick support, removed after GCC-4.7.  */
22388       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22389       return;
22390
22391     case 'U':
22392       if (!REG_P (x)
22393           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22394           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22395         /* Bad value for wCG register number.  */
22396         {
22397           output_operand_lossage ("invalid operand for code '%c'", code);
22398           return;
22399         }
22400
22401       else
22402         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22403       return;
22404
22405       /* Print an iWMMXt control register name.  */
22406     case 'w':
22407       if (!CONST_INT_P (x)
22408           || INTVAL (x) < 0
22409           || INTVAL (x) >= 16)
22410         /* Bad value for wC register number.  */
22411         {
22412           output_operand_lossage ("invalid operand for code '%c'", code);
22413           return;
22414         }
22415
22416       else
22417         {
22418           static const char * wc_reg_names [16] =
22419             {
22420               "wCID",  "wCon",  "wCSSF", "wCASF",
22421               "wC4",   "wC5",   "wC6",   "wC7",
22422               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22423               "wC12",  "wC13",  "wC14",  "wC15"
22424             };
22425
22426           fputs (wc_reg_names [INTVAL (x)], stream);
22427         }
22428       return;
22429
22430     /* Print the high single-precision register of a VFP double-precision
22431        register.  */
22432     case 'p':
22433       {
22434         machine_mode mode = GET_MODE (x);
22435         int regno;
22436
22437         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22438           {
22439             output_operand_lossage ("invalid operand for code '%c'", code);
22440             return;
22441           }
22442
22443         regno = REGNO (x);
22444         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22445           {
22446             output_operand_lossage ("invalid operand for code '%c'", code);
22447             return;
22448           }
22449
22450         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22451       }
22452       return;
22453
22454     /* Print a VFP/Neon double precision or quad precision register name.  */
22455     case 'P':
22456     case 'q':
22457       {
22458         machine_mode mode = GET_MODE (x);
22459         int is_quad = (code == 'q');
22460         int regno;
22461
22462         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22463           {
22464             output_operand_lossage ("invalid operand for code '%c'", code);
22465             return;
22466           }
22467
22468         if (!REG_P (x)
22469             || !IS_VFP_REGNUM (REGNO (x)))
22470           {
22471             output_operand_lossage ("invalid operand for code '%c'", code);
22472             return;
22473           }
22474
22475         regno = REGNO (x);
22476         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22477             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22478           {
22479             output_operand_lossage ("invalid operand for code '%c'", code);
22480             return;
22481           }
22482
22483         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22484           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22485       }
22486       return;
22487
22488     /* These two codes print the low/high doubleword register of a Neon quad
22489        register, respectively.  For pair-structure types, can also print
22490        low/high quadword registers.  */
22491     case 'e':
22492     case 'f':
22493       {
22494         machine_mode mode = GET_MODE (x);
22495         int regno;
22496
22497         if ((GET_MODE_SIZE (mode) != 16
22498              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22499           {
22500             output_operand_lossage ("invalid operand for code '%c'", code);
22501             return;
22502           }
22503
22504         regno = REGNO (x);
22505         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22506           {
22507             output_operand_lossage ("invalid operand for code '%c'", code);
22508             return;
22509           }
22510
22511         if (GET_MODE_SIZE (mode) == 16)
22512           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22513                                   + (code == 'f' ? 1 : 0));
22514         else
22515           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22516                                   + (code == 'f' ? 1 : 0));
22517       }
22518       return;
22519
22520     /* Print a VFPv3 floating-point constant, represented as an integer
22521        index.  */
22522     case 'G':
22523       {
22524         int index = vfp3_const_double_index (x);
22525         gcc_assert (index != -1);
22526         fprintf (stream, "%d", index);
22527       }
22528       return;
22529
22530     /* Print bits representing opcode features for Neon.
22531
22532        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22533        and polynomials as unsigned.
22534
22535        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22536
22537        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22538
22539     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22540     case 'T':
22541       {
22542         HOST_WIDE_INT bits = INTVAL (x);
22543         fputc ("uspf"[bits & 3], stream);
22544       }
22545       return;
22546
22547     /* Likewise, but signed and unsigned integers are both 'i'.  */
22548     case 'F':
22549       {
22550         HOST_WIDE_INT bits = INTVAL (x);
22551         fputc ("iipf"[bits & 3], stream);
22552       }
22553       return;
22554
22555     /* As for 'T', but emit 'u' instead of 'p'.  */
22556     case 't':
22557       {
22558         HOST_WIDE_INT bits = INTVAL (x);
22559         fputc ("usuf"[bits & 3], stream);
22560       }
22561       return;
22562
22563     /* Bit 2: rounding (vs none).  */
22564     case 'O':
22565       {
22566         HOST_WIDE_INT bits = INTVAL (x);
22567         fputs ((bits & 4) != 0 ? "r" : "", stream);
22568       }
22569       return;
22570
22571     /* Memory operand for vld1/vst1 instruction.  */
22572     case 'A':
22573       {
22574         rtx addr;
22575         bool postinc = FALSE;
22576         rtx postinc_reg = NULL;
22577         unsigned align, memsize, align_bits;
22578
22579         gcc_assert (MEM_P (x));
22580         addr = XEXP (x, 0);
22581         if (GET_CODE (addr) == POST_INC)
22582           {
22583             postinc = 1;
22584             addr = XEXP (addr, 0);
22585           }
22586         if (GET_CODE (addr) == POST_MODIFY)
22587           {
22588             postinc_reg = XEXP( XEXP (addr, 1), 1);
22589             addr = XEXP (addr, 0);
22590           }
22591         asm_fprintf (stream, "[%r", REGNO (addr));
22592
22593         /* We know the alignment of this access, so we can emit a hint in the
22594            instruction (for some alignments) as an aid to the memory subsystem
22595            of the target.  */
22596         align = MEM_ALIGN (x) >> 3;
22597         memsize = MEM_SIZE (x);
22598
22599         /* Only certain alignment specifiers are supported by the hardware.  */
22600         if (memsize == 32 && (align % 32) == 0)
22601           align_bits = 256;
22602         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22603           align_bits = 128;
22604         else if (memsize >= 8 && (align % 8) == 0)
22605           align_bits = 64;
22606         else
22607           align_bits = 0;
22608
22609         if (align_bits != 0)
22610           asm_fprintf (stream, ":%d", align_bits);
22611
22612         asm_fprintf (stream, "]");
22613
22614         if (postinc)
22615           fputs("!", stream);
22616         if (postinc_reg)
22617           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22618       }
22619       return;
22620
22621     case 'C':
22622       {
22623         rtx addr;
22624
22625         gcc_assert (MEM_P (x));
22626         addr = XEXP (x, 0);
22627         gcc_assert (REG_P (addr));
22628         asm_fprintf (stream, "[%r]", REGNO (addr));
22629       }
22630       return;
22631
22632     /* Translate an S register number into a D register number and element index.  */
22633     case 'y':
22634       {
22635         machine_mode mode = GET_MODE (x);
22636         int regno;
22637
22638         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22639           {
22640             output_operand_lossage ("invalid operand for code '%c'", code);
22641             return;
22642           }
22643
22644         regno = REGNO (x);
22645         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22646           {
22647             output_operand_lossage ("invalid operand for code '%c'", code);
22648             return;
22649           }
22650
22651         regno = regno - FIRST_VFP_REGNUM;
22652         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22653       }
22654       return;
22655
22656     case 'v':
22657         gcc_assert (CONST_DOUBLE_P (x));
22658         int result;
22659         result = vfp3_const_double_for_fract_bits (x);
22660         if (result == 0)
22661           result = vfp3_const_double_for_bits (x);
22662         fprintf (stream, "#%d", result);
22663         return;
22664
22665     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22666        number into a D register number and element index.  */
22667     case 'z':
22668       {
22669         machine_mode mode = GET_MODE (x);
22670         int regno;
22671
22672         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22673           {
22674             output_operand_lossage ("invalid operand for code '%c'", code);
22675             return;
22676           }
22677
22678         regno = REGNO (x);
22679         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22680           {
22681             output_operand_lossage ("invalid operand for code '%c'", code);
22682             return;
22683           }
22684
22685         regno = regno - FIRST_VFP_REGNUM;
22686         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22687       }
22688       return;
22689
22690     default:
22691       if (x == 0)
22692         {
22693           output_operand_lossage ("missing operand");
22694           return;
22695         }
22696
22697       switch (GET_CODE (x))
22698         {
22699         case REG:
22700           asm_fprintf (stream, "%r", REGNO (x));
22701           break;
22702
22703         case MEM:
22704           output_address (GET_MODE (x), XEXP (x, 0));
22705           break;
22706
22707         case CONST_DOUBLE:
22708           {
22709             char fpstr[20];
22710             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22711                               sizeof (fpstr), 0, 1);
22712             fprintf (stream, "#%s", fpstr);
22713           }
22714           break;
22715
22716         default:
22717           gcc_assert (GET_CODE (x) != NEG);
22718           fputc ('#', stream);
22719           if (GET_CODE (x) == HIGH)
22720             {
22721               fputs (":lower16:", stream);
22722               x = XEXP (x, 0);
22723             }
22724
22725           output_addr_const (stream, x);
22726           break;
22727         }
22728     }
22729 }
22730 \f
22731 /* Target hook for printing a memory address.  */
22732 static void
22733 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22734 {
22735   if (TARGET_32BIT)
22736     {
22737       int is_minus = GET_CODE (x) == MINUS;
22738
22739       if (REG_P (x))
22740         asm_fprintf (stream, "[%r]", REGNO (x));
22741       else if (GET_CODE (x) == PLUS || is_minus)
22742         {
22743           rtx base = XEXP (x, 0);
22744           rtx index = XEXP (x, 1);
22745           HOST_WIDE_INT offset = 0;
22746           if (!REG_P (base)
22747               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22748             {
22749               /* Ensure that BASE is a register.  */
22750               /* (one of them must be).  */
22751               /* Also ensure the SP is not used as in index register.  */
22752               std::swap (base, index);
22753             }
22754           switch (GET_CODE (index))
22755             {
22756             case CONST_INT:
22757               offset = INTVAL (index);
22758               if (is_minus)
22759                 offset = -offset;
22760               asm_fprintf (stream, "[%r, #%wd]",
22761                            REGNO (base), offset);
22762               break;
22763
22764             case REG:
22765               asm_fprintf (stream, "[%r, %s%r]",
22766                            REGNO (base), is_minus ? "-" : "",
22767                            REGNO (index));
22768               break;
22769
22770             case MULT:
22771             case ASHIFTRT:
22772             case LSHIFTRT:
22773             case ASHIFT:
22774             case ROTATERT:
22775               {
22776                 asm_fprintf (stream, "[%r, %s%r",
22777                              REGNO (base), is_minus ? "-" : "",
22778                              REGNO (XEXP (index, 0)));
22779                 arm_print_operand (stream, index, 'S');
22780                 fputs ("]", stream);
22781                 break;
22782               }
22783
22784             default:
22785               gcc_unreachable ();
22786             }
22787         }
22788       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22789                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22790         {
22791           gcc_assert (REG_P (XEXP (x, 0)));
22792
22793           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22794             asm_fprintf (stream, "[%r, #%s%d]!",
22795                          REGNO (XEXP (x, 0)),
22796                          GET_CODE (x) == PRE_DEC ? "-" : "",
22797                          GET_MODE_SIZE (mode));
22798           else
22799             asm_fprintf (stream, "[%r], #%s%d",
22800                          REGNO (XEXP (x, 0)),
22801                          GET_CODE (x) == POST_DEC ? "-" : "",
22802                          GET_MODE_SIZE (mode));
22803         }
22804       else if (GET_CODE (x) == PRE_MODIFY)
22805         {
22806           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22807           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22808             asm_fprintf (stream, "#%wd]!",
22809                          INTVAL (XEXP (XEXP (x, 1), 1)));
22810           else
22811             asm_fprintf (stream, "%r]!",
22812                          REGNO (XEXP (XEXP (x, 1), 1)));
22813         }
22814       else if (GET_CODE (x) == POST_MODIFY)
22815         {
22816           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22817           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22818             asm_fprintf (stream, "#%wd",
22819                          INTVAL (XEXP (XEXP (x, 1), 1)));
22820           else
22821             asm_fprintf (stream, "%r",
22822                          REGNO (XEXP (XEXP (x, 1), 1)));
22823         }
22824       else output_addr_const (stream, x);
22825     }
22826   else
22827     {
22828       if (REG_P (x))
22829         asm_fprintf (stream, "[%r]", REGNO (x));
22830       else if (GET_CODE (x) == POST_INC)
22831         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22832       else if (GET_CODE (x) == PLUS)
22833         {
22834           gcc_assert (REG_P (XEXP (x, 0)));
22835           if (CONST_INT_P (XEXP (x, 1)))
22836             asm_fprintf (stream, "[%r, #%wd]",
22837                          REGNO (XEXP (x, 0)),
22838                          INTVAL (XEXP (x, 1)));
22839           else
22840             asm_fprintf (stream, "[%r, %r]",
22841                          REGNO (XEXP (x, 0)),
22842                          REGNO (XEXP (x, 1)));
22843         }
22844       else
22845         output_addr_const (stream, x);
22846     }
22847 }
22848 \f
22849 /* Target hook for indicating whether a punctuation character for
22850    TARGET_PRINT_OPERAND is valid.  */
22851 static bool
22852 arm_print_operand_punct_valid_p (unsigned char code)
22853 {
22854   return (code == '@' || code == '|' || code == '.'
22855           || code == '(' || code == ')' || code == '#'
22856           || (TARGET_32BIT && (code == '?'))
22857           || (TARGET_THUMB2 && (code == '!'))
22858           || (TARGET_THUMB && (code == '_')));
22859 }
22860 \f
22861 /* Target hook for assembling integer objects.  The ARM version needs to
22862    handle word-sized values specially.  */
22863 static bool
22864 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22865 {
22866   machine_mode mode;
22867
22868   if (size == UNITS_PER_WORD && aligned_p)
22869     {
22870       fputs ("\t.word\t", asm_out_file);
22871       output_addr_const (asm_out_file, x);
22872
22873       /* Mark symbols as position independent.  We only do this in the
22874          .text segment, not in the .data segment.  */
22875       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22876           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22877         {
22878           /* See legitimize_pic_address for an explanation of the
22879              TARGET_VXWORKS_RTP check.  */
22880           /* References to weak symbols cannot be resolved locally:
22881              they may be overridden by a non-weak definition at link
22882              time.  */
22883           if (!arm_pic_data_is_text_relative
22884               || (GET_CODE (x) == SYMBOL_REF
22885                   && (!SYMBOL_REF_LOCAL_P (x)
22886                       || (SYMBOL_REF_DECL (x)
22887                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22888             fputs ("(GOT)", asm_out_file);
22889           else
22890             fputs ("(GOTOFF)", asm_out_file);
22891         }
22892       fputc ('\n', asm_out_file);
22893       return true;
22894     }
22895
22896   mode = GET_MODE (x);
22897
22898   if (arm_vector_mode_supported_p (mode))
22899     {
22900       int i, units;
22901
22902       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22903
22904       units = CONST_VECTOR_NUNITS (x);
22905       size = GET_MODE_UNIT_SIZE (mode);
22906
22907       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22908         for (i = 0; i < units; i++)
22909           {
22910             rtx elt = CONST_VECTOR_ELT (x, i);
22911             assemble_integer
22912               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22913           }
22914       else
22915         for (i = 0; i < units; i++)
22916           {
22917             rtx elt = CONST_VECTOR_ELT (x, i);
22918             assemble_real
22919               (*CONST_DOUBLE_REAL_VALUE (elt),
22920                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22921                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22922           }
22923
22924       return true;
22925     }
22926
22927   return default_assemble_integer (x, size, aligned_p);
22928 }
22929
22930 static void
22931 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22932 {
22933   section *s;
22934
22935   if (!TARGET_AAPCS_BASED)
22936     {
22937       (is_ctor ?
22938        default_named_section_asm_out_constructor
22939        : default_named_section_asm_out_destructor) (symbol, priority);
22940       return;
22941     }
22942
22943   /* Put these in the .init_array section, using a special relocation.  */
22944   if (priority != DEFAULT_INIT_PRIORITY)
22945     {
22946       char buf[18];
22947       sprintf (buf, "%s.%.5u",
22948                is_ctor ? ".init_array" : ".fini_array",
22949                priority);
22950       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22951     }
22952   else if (is_ctor)
22953     s = ctors_section;
22954   else
22955     s = dtors_section;
22956
22957   switch_to_section (s);
22958   assemble_align (POINTER_SIZE);
22959   fputs ("\t.word\t", asm_out_file);
22960   output_addr_const (asm_out_file, symbol);
22961   fputs ("(target1)\n", asm_out_file);
22962 }
22963
22964 /* Add a function to the list of static constructors.  */
22965
22966 static void
22967 arm_elf_asm_constructor (rtx symbol, int priority)
22968 {
22969   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22970 }
22971
22972 /* Add a function to the list of static destructors.  */
22973
22974 static void
22975 arm_elf_asm_destructor (rtx symbol, int priority)
22976 {
22977   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22978 }
22979 \f
22980 /* A finite state machine takes care of noticing whether or not instructions
22981    can be conditionally executed, and thus decrease execution time and code
22982    size by deleting branch instructions.  The fsm is controlled by
22983    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22984
22985 /* The state of the fsm controlling condition codes are:
22986    0: normal, do nothing special
22987    1: make ASM_OUTPUT_OPCODE not output this instruction
22988    2: make ASM_OUTPUT_OPCODE not output this instruction
22989    3: make instructions conditional
22990    4: make instructions conditional
22991
22992    State transitions (state->state by whom under condition):
22993    0 -> 1 final_prescan_insn if the `target' is a label
22994    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22995    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22996    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22997    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22998           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22999    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23000           (the target insn is arm_target_insn).
23001
23002    If the jump clobbers the conditions then we use states 2 and 4.
23003
23004    A similar thing can be done with conditional return insns.
23005
23006    XXX In case the `target' is an unconditional branch, this conditionalising
23007    of the instructions always reduces code size, but not always execution
23008    time.  But then, I want to reduce the code size to somewhere near what
23009    /bin/cc produces.  */
23010
23011 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23012    instructions.  When a COND_EXEC instruction is seen the subsequent
23013    instructions are scanned so that multiple conditional instructions can be
23014    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23015    specify the length and true/false mask for the IT block.  These will be
23016    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23017
23018 /* Returns the index of the ARM condition code string in
23019    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23020    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23021
23022 enum arm_cond_code
23023 maybe_get_arm_condition_code (rtx comparison)
23024 {
23025   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23026   enum arm_cond_code code;
23027   enum rtx_code comp_code = GET_CODE (comparison);
23028
23029   if (GET_MODE_CLASS (mode) != MODE_CC)
23030     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23031                            XEXP (comparison, 1));
23032
23033   switch (mode)
23034     {
23035     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23036     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23037     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23038     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23039     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23040     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23041     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23042     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23043     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23044     case E_CC_DLTUmode: code = ARM_CC;
23045
23046     dominance:
23047       if (comp_code == EQ)
23048         return ARM_INVERSE_CONDITION_CODE (code);
23049       if (comp_code == NE)
23050         return code;
23051       return ARM_NV;
23052
23053     case E_CC_NOOVmode:
23054       switch (comp_code)
23055         {
23056         case NE: return ARM_NE;
23057         case EQ: return ARM_EQ;
23058         case GE: return ARM_PL;
23059         case LT: return ARM_MI;
23060         default: return ARM_NV;
23061         }
23062
23063     case E_CC_Zmode:
23064       switch (comp_code)
23065         {
23066         case NE: return ARM_NE;
23067         case EQ: return ARM_EQ;
23068         default: return ARM_NV;
23069         }
23070
23071     case E_CC_Nmode:
23072       switch (comp_code)
23073         {
23074         case NE: return ARM_MI;
23075         case EQ: return ARM_PL;
23076         default: return ARM_NV;
23077         }
23078
23079     case E_CCFPEmode:
23080     case E_CCFPmode:
23081       /* We can handle all cases except UNEQ and LTGT.  */
23082       switch (comp_code)
23083         {
23084         case GE: return ARM_GE;
23085         case GT: return ARM_GT;
23086         case LE: return ARM_LS;
23087         case LT: return ARM_MI;
23088         case NE: return ARM_NE;
23089         case EQ: return ARM_EQ;
23090         case ORDERED: return ARM_VC;
23091         case UNORDERED: return ARM_VS;
23092         case UNLT: return ARM_LT;
23093         case UNLE: return ARM_LE;
23094         case UNGT: return ARM_HI;
23095         case UNGE: return ARM_PL;
23096           /* UNEQ and LTGT do not have a representation.  */
23097         case UNEQ: /* Fall through.  */
23098         case LTGT: /* Fall through.  */
23099         default: return ARM_NV;
23100         }
23101
23102     case E_CC_SWPmode:
23103       switch (comp_code)
23104         {
23105         case NE: return ARM_NE;
23106         case EQ: return ARM_EQ;
23107         case GE: return ARM_LE;
23108         case GT: return ARM_LT;
23109         case LE: return ARM_GE;
23110         case LT: return ARM_GT;
23111         case GEU: return ARM_LS;
23112         case GTU: return ARM_CC;
23113         case LEU: return ARM_CS;
23114         case LTU: return ARM_HI;
23115         default: return ARM_NV;
23116         }
23117
23118     case E_CC_Cmode:
23119       switch (comp_code)
23120         {
23121         case LTU: return ARM_CS;
23122         case GEU: return ARM_CC;
23123         case NE: return ARM_CS;
23124         case EQ: return ARM_CC;
23125         default: return ARM_NV;
23126         }
23127
23128     case E_CC_CZmode:
23129       switch (comp_code)
23130         {
23131         case NE: return ARM_NE;
23132         case EQ: return ARM_EQ;
23133         case GEU: return ARM_CS;
23134         case GTU: return ARM_HI;
23135         case LEU: return ARM_LS;
23136         case LTU: return ARM_CC;
23137         default: return ARM_NV;
23138         }
23139
23140     case E_CC_NCVmode:
23141       switch (comp_code)
23142         {
23143         case GE: return ARM_GE;
23144         case LT: return ARM_LT;
23145         case GEU: return ARM_CS;
23146         case LTU: return ARM_CC;
23147         default: return ARM_NV;
23148         }
23149
23150     case E_CC_Vmode:
23151       switch (comp_code)
23152         {
23153         case NE: return ARM_VS;
23154         case EQ: return ARM_VC;
23155         default: return ARM_NV;
23156         }
23157
23158     case E_CCmode:
23159       switch (comp_code)
23160         {
23161         case NE: return ARM_NE;
23162         case EQ: return ARM_EQ;
23163         case GE: return ARM_GE;
23164         case GT: return ARM_GT;
23165         case LE: return ARM_LE;
23166         case LT: return ARM_LT;
23167         case GEU: return ARM_CS;
23168         case GTU: return ARM_HI;
23169         case LEU: return ARM_LS;
23170         case LTU: return ARM_CC;
23171         default: return ARM_NV;
23172         }
23173
23174     default: gcc_unreachable ();
23175     }
23176 }
23177
23178 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23179 static enum arm_cond_code
23180 get_arm_condition_code (rtx comparison)
23181 {
23182   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23183   gcc_assert (code != ARM_NV);
23184   return code;
23185 }
23186
23187 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23188    code registers when not targetting Thumb1.  The VFP condition register
23189    only exists when generating hard-float code.  */
23190 static bool
23191 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23192 {
23193   if (!TARGET_32BIT)
23194     return false;
23195
23196   *p1 = CC_REGNUM;
23197   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23198   return true;
23199 }
23200
23201 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23202    instructions.  */
23203 void
23204 thumb2_final_prescan_insn (rtx_insn *insn)
23205 {
23206   rtx_insn *first_insn = insn;
23207   rtx body = PATTERN (insn);
23208   rtx predicate;
23209   enum arm_cond_code code;
23210   int n;
23211   int mask;
23212   int max;
23213
23214   /* max_insns_skipped in the tune was already taken into account in the
23215      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23216      just emit the IT blocks as we can.  It does not make sense to split
23217      the IT blocks.  */
23218   max = MAX_INSN_PER_IT_BLOCK;
23219
23220   /* Remove the previous insn from the count of insns to be output.  */
23221   if (arm_condexec_count)
23222       arm_condexec_count--;
23223
23224   /* Nothing to do if we are already inside a conditional block.  */
23225   if (arm_condexec_count)
23226     return;
23227
23228   if (GET_CODE (body) != COND_EXEC)
23229     return;
23230
23231   /* Conditional jumps are implemented directly.  */
23232   if (JUMP_P (insn))
23233     return;
23234
23235   predicate = COND_EXEC_TEST (body);
23236   arm_current_cc = get_arm_condition_code (predicate);
23237
23238   n = get_attr_ce_count (insn);
23239   arm_condexec_count = 1;
23240   arm_condexec_mask = (1 << n) - 1;
23241   arm_condexec_masklen = n;
23242   /* See if subsequent instructions can be combined into the same block.  */
23243   for (;;)
23244     {
23245       insn = next_nonnote_insn (insn);
23246
23247       /* Jumping into the middle of an IT block is illegal, so a label or
23248          barrier terminates the block.  */
23249       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23250         break;
23251
23252       body = PATTERN (insn);
23253       /* USE and CLOBBER aren't really insns, so just skip them.  */
23254       if (GET_CODE (body) == USE
23255           || GET_CODE (body) == CLOBBER)
23256         continue;
23257
23258       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23259       if (GET_CODE (body) != COND_EXEC)
23260         break;
23261       /* Maximum number of conditionally executed instructions in a block.  */
23262       n = get_attr_ce_count (insn);
23263       if (arm_condexec_masklen + n > max)
23264         break;
23265
23266       predicate = COND_EXEC_TEST (body);
23267       code = get_arm_condition_code (predicate);
23268       mask = (1 << n) - 1;
23269       if (arm_current_cc == code)
23270         arm_condexec_mask |= (mask << arm_condexec_masklen);
23271       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23272         break;
23273
23274       arm_condexec_count++;
23275       arm_condexec_masklen += n;
23276
23277       /* A jump must be the last instruction in a conditional block.  */
23278       if (JUMP_P (insn))
23279         break;
23280     }
23281   /* Restore recog_data (getting the attributes of other insns can
23282      destroy this array, but final.c assumes that it remains intact
23283      across this call).  */
23284   extract_constrain_insn_cached (first_insn);
23285 }
23286
23287 void
23288 arm_final_prescan_insn (rtx_insn *insn)
23289 {
23290   /* BODY will hold the body of INSN.  */
23291   rtx body = PATTERN (insn);
23292
23293   /* This will be 1 if trying to repeat the trick, and things need to be
23294      reversed if it appears to fail.  */
23295   int reverse = 0;
23296
23297   /* If we start with a return insn, we only succeed if we find another one.  */
23298   int seeking_return = 0;
23299   enum rtx_code return_code = UNKNOWN;
23300
23301   /* START_INSN will hold the insn from where we start looking.  This is the
23302      first insn after the following code_label if REVERSE is true.  */
23303   rtx_insn *start_insn = insn;
23304
23305   /* If in state 4, check if the target branch is reached, in order to
23306      change back to state 0.  */
23307   if (arm_ccfsm_state == 4)
23308     {
23309       if (insn == arm_target_insn)
23310         {
23311           arm_target_insn = NULL;
23312           arm_ccfsm_state = 0;
23313         }
23314       return;
23315     }
23316
23317   /* If in state 3, it is possible to repeat the trick, if this insn is an
23318      unconditional branch to a label, and immediately following this branch
23319      is the previous target label which is only used once, and the label this
23320      branch jumps to is not too far off.  */
23321   if (arm_ccfsm_state == 3)
23322     {
23323       if (simplejump_p (insn))
23324         {
23325           start_insn = next_nonnote_insn (start_insn);
23326           if (BARRIER_P (start_insn))
23327             {
23328               /* XXX Isn't this always a barrier?  */
23329               start_insn = next_nonnote_insn (start_insn);
23330             }
23331           if (LABEL_P (start_insn)
23332               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23333               && LABEL_NUSES (start_insn) == 1)
23334             reverse = TRUE;
23335           else
23336             return;
23337         }
23338       else if (ANY_RETURN_P (body))
23339         {
23340           start_insn = next_nonnote_insn (start_insn);
23341           if (BARRIER_P (start_insn))
23342             start_insn = next_nonnote_insn (start_insn);
23343           if (LABEL_P (start_insn)
23344               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23345               && LABEL_NUSES (start_insn) == 1)
23346             {
23347               reverse = TRUE;
23348               seeking_return = 1;
23349               return_code = GET_CODE (body);
23350             }
23351           else
23352             return;
23353         }
23354       else
23355         return;
23356     }
23357
23358   gcc_assert (!arm_ccfsm_state || reverse);
23359   if (!JUMP_P (insn))
23360     return;
23361
23362   /* This jump might be paralleled with a clobber of the condition codes
23363      the jump should always come first */
23364   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23365     body = XVECEXP (body, 0, 0);
23366
23367   if (reverse
23368       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23369           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23370     {
23371       int insns_skipped;
23372       int fail = FALSE, succeed = FALSE;
23373       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23374       int then_not_else = TRUE;
23375       rtx_insn *this_insn = start_insn;
23376       rtx label = 0;
23377
23378       /* Register the insn jumped to.  */
23379       if (reverse)
23380         {
23381           if (!seeking_return)
23382             label = XEXP (SET_SRC (body), 0);
23383         }
23384       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23385         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23386       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23387         {
23388           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23389           then_not_else = FALSE;
23390         }
23391       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23392         {
23393           seeking_return = 1;
23394           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23395         }
23396       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23397         {
23398           seeking_return = 1;
23399           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23400           then_not_else = FALSE;
23401         }
23402       else
23403         gcc_unreachable ();
23404
23405       /* See how many insns this branch skips, and what kind of insns.  If all
23406          insns are okay, and the label or unconditional branch to the same
23407          label is not too far away, succeed.  */
23408       for (insns_skipped = 0;
23409            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23410         {
23411           rtx scanbody;
23412
23413           this_insn = next_nonnote_insn (this_insn);
23414           if (!this_insn)
23415             break;
23416
23417           switch (GET_CODE (this_insn))
23418             {
23419             case CODE_LABEL:
23420               /* Succeed if it is the target label, otherwise fail since
23421                  control falls in from somewhere else.  */
23422               if (this_insn == label)
23423                 {
23424                   arm_ccfsm_state = 1;
23425                   succeed = TRUE;
23426                 }
23427               else
23428                 fail = TRUE;
23429               break;
23430
23431             case BARRIER:
23432               /* Succeed if the following insn is the target label.
23433                  Otherwise fail.
23434                  If return insns are used then the last insn in a function
23435                  will be a barrier.  */
23436               this_insn = next_nonnote_insn (this_insn);
23437               if (this_insn && this_insn == label)
23438                 {
23439                   arm_ccfsm_state = 1;
23440                   succeed = TRUE;
23441                 }
23442               else
23443                 fail = TRUE;
23444               break;
23445
23446             case CALL_INSN:
23447               /* The AAPCS says that conditional calls should not be
23448                  used since they make interworking inefficient (the
23449                  linker can't transform BL<cond> into BLX).  That's
23450                  only a problem if the machine has BLX.  */
23451               if (arm_arch5t)
23452                 {
23453                   fail = TRUE;
23454                   break;
23455                 }
23456
23457               /* Succeed if the following insn is the target label, or
23458                  if the following two insns are a barrier and the
23459                  target label.  */
23460               this_insn = next_nonnote_insn (this_insn);
23461               if (this_insn && BARRIER_P (this_insn))
23462                 this_insn = next_nonnote_insn (this_insn);
23463
23464               if (this_insn && this_insn == label
23465                   && insns_skipped < max_insns_skipped)
23466                 {
23467                   arm_ccfsm_state = 1;
23468                   succeed = TRUE;
23469                 }
23470               else
23471                 fail = TRUE;
23472               break;
23473
23474             case JUMP_INSN:
23475               /* If this is an unconditional branch to the same label, succeed.
23476                  If it is to another label, do nothing.  If it is conditional,
23477                  fail.  */
23478               /* XXX Probably, the tests for SET and the PC are
23479                  unnecessary.  */
23480
23481               scanbody = PATTERN (this_insn);
23482               if (GET_CODE (scanbody) == SET
23483                   && GET_CODE (SET_DEST (scanbody)) == PC)
23484                 {
23485                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23486                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23487                     {
23488                       arm_ccfsm_state = 2;
23489                       succeed = TRUE;
23490                     }
23491                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23492                     fail = TRUE;
23493                 }
23494               /* Fail if a conditional return is undesirable (e.g. on a
23495                  StrongARM), but still allow this if optimizing for size.  */
23496               else if (GET_CODE (scanbody) == return_code
23497                        && !use_return_insn (TRUE, NULL)
23498                        && !optimize_size)
23499                 fail = TRUE;
23500               else if (GET_CODE (scanbody) == return_code)
23501                 {
23502                   arm_ccfsm_state = 2;
23503                   succeed = TRUE;
23504                 }
23505               else if (GET_CODE (scanbody) == PARALLEL)
23506                 {
23507                   switch (get_attr_conds (this_insn))
23508                     {
23509                     case CONDS_NOCOND:
23510                       break;
23511                     default:
23512                       fail = TRUE;
23513                       break;
23514                     }
23515                 }
23516               else
23517                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23518
23519               break;
23520
23521             case INSN:
23522               /* Instructions using or affecting the condition codes make it
23523                  fail.  */
23524               scanbody = PATTERN (this_insn);
23525               if (!(GET_CODE (scanbody) == SET
23526                     || GET_CODE (scanbody) == PARALLEL)
23527                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23528                 fail = TRUE;
23529               break;
23530
23531             default:
23532               break;
23533             }
23534         }
23535       if (succeed)
23536         {
23537           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23538             arm_target_label = CODE_LABEL_NUMBER (label);
23539           else
23540             {
23541               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23542
23543               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23544                 {
23545                   this_insn = next_nonnote_insn (this_insn);
23546                   gcc_assert (!this_insn
23547                               || (!BARRIER_P (this_insn)
23548                                   && !LABEL_P (this_insn)));
23549                 }
23550               if (!this_insn)
23551                 {
23552                   /* Oh, dear! we ran off the end.. give up.  */
23553                   extract_constrain_insn_cached (insn);
23554                   arm_ccfsm_state = 0;
23555                   arm_target_insn = NULL;
23556                   return;
23557                 }
23558               arm_target_insn = this_insn;
23559             }
23560
23561           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23562              what it was.  */
23563           if (!reverse)
23564             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23565
23566           if (reverse || then_not_else)
23567             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23568         }
23569
23570       /* Restore recog_data (getting the attributes of other insns can
23571          destroy this array, but final.c assumes that it remains intact
23572          across this call.  */
23573       extract_constrain_insn_cached (insn);
23574     }
23575 }
23576
23577 /* Output IT instructions.  */
23578 void
23579 thumb2_asm_output_opcode (FILE * stream)
23580 {
23581   char buff[5];
23582   int n;
23583
23584   if (arm_condexec_mask)
23585     {
23586       for (n = 0; n < arm_condexec_masklen; n++)
23587         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23588       buff[n] = 0;
23589       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23590                   arm_condition_codes[arm_current_cc]);
23591       arm_condexec_mask = 0;
23592     }
23593 }
23594
23595 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23596    UNITS_PER_WORD bytes wide.  */
23597 static unsigned int
23598 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23599 {
23600   if (TARGET_32BIT
23601       && regno > PC_REGNUM
23602       && regno != FRAME_POINTER_REGNUM
23603       && regno != ARG_POINTER_REGNUM
23604       && !IS_VFP_REGNUM (regno))
23605     return 1;
23606
23607   return ARM_NUM_REGS (mode);
23608 }
23609
23610 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23611 static bool
23612 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23613 {
23614   if (GET_MODE_CLASS (mode) == MODE_CC)
23615     return (regno == CC_REGNUM
23616             || (TARGET_HARD_FLOAT
23617                 && regno == VFPCC_REGNUM));
23618
23619   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23620     return false;
23621
23622   if (TARGET_THUMB1)
23623     /* For the Thumb we only allow values bigger than SImode in
23624        registers 0 - 6, so that there is always a second low
23625        register available to hold the upper part of the value.
23626        We probably we ought to ensure that the register is the
23627        start of an even numbered register pair.  */
23628     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23629
23630   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23631     {
23632       if (mode == SFmode || mode == SImode)
23633         return VFP_REGNO_OK_FOR_SINGLE (regno);
23634
23635       if (mode == DFmode)
23636         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23637
23638       if (mode == HFmode)
23639         return VFP_REGNO_OK_FOR_SINGLE (regno);
23640
23641       /* VFP registers can hold HImode values.  */
23642       if (mode == HImode)
23643         return VFP_REGNO_OK_FOR_SINGLE (regno);
23644
23645       if (TARGET_NEON)
23646         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23647                || (VALID_NEON_QREG_MODE (mode)
23648                    && NEON_REGNO_OK_FOR_QUAD (regno))
23649                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23650                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23651                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23652                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23653                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23654
23655       return false;
23656     }
23657
23658   if (TARGET_REALLY_IWMMXT)
23659     {
23660       if (IS_IWMMXT_GR_REGNUM (regno))
23661         return mode == SImode;
23662
23663       if (IS_IWMMXT_REGNUM (regno))
23664         return VALID_IWMMXT_REG_MODE (mode);
23665     }
23666
23667   /* We allow almost any value to be stored in the general registers.
23668      Restrict doubleword quantities to even register pairs in ARM state
23669      so that we can use ldrd.  Do not allow very large Neon structure
23670      opaque modes in general registers; they would use too many.  */
23671   if (regno <= LAST_ARM_REGNUM)
23672     {
23673       if (ARM_NUM_REGS (mode) > 4)
23674         return false;
23675
23676       if (TARGET_THUMB2)
23677         return true;
23678
23679       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23680     }
23681
23682   if (regno == FRAME_POINTER_REGNUM
23683       || regno == ARG_POINTER_REGNUM)
23684     /* We only allow integers in the fake hard registers.  */
23685     return GET_MODE_CLASS (mode) == MODE_INT;
23686
23687   return false;
23688 }
23689
23690 /* Implement TARGET_MODES_TIEABLE_P.  */
23691
23692 static bool
23693 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23694 {
23695   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23696     return true;
23697
23698   /* We specifically want to allow elements of "structure" modes to
23699      be tieable to the structure.  This more general condition allows
23700      other rarer situations too.  */
23701   if (TARGET_NEON
23702       && (VALID_NEON_DREG_MODE (mode1)
23703           || VALID_NEON_QREG_MODE (mode1)
23704           || VALID_NEON_STRUCT_MODE (mode1))
23705       && (VALID_NEON_DREG_MODE (mode2)
23706           || VALID_NEON_QREG_MODE (mode2)
23707           || VALID_NEON_STRUCT_MODE (mode2)))
23708     return true;
23709
23710   return false;
23711 }
23712
23713 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23714    not used in arm mode.  */
23715
23716 enum reg_class
23717 arm_regno_class (int regno)
23718 {
23719   if (regno == PC_REGNUM)
23720     return NO_REGS;
23721
23722   if (TARGET_THUMB1)
23723     {
23724       if (regno == STACK_POINTER_REGNUM)
23725         return STACK_REG;
23726       if (regno == CC_REGNUM)
23727         return CC_REG;
23728       if (regno < 8)
23729         return LO_REGS;
23730       return HI_REGS;
23731     }
23732
23733   if (TARGET_THUMB2 && regno < 8)
23734     return LO_REGS;
23735
23736   if (   regno <= LAST_ARM_REGNUM
23737       || regno == FRAME_POINTER_REGNUM
23738       || regno == ARG_POINTER_REGNUM)
23739     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23740
23741   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23742     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23743
23744   if (IS_VFP_REGNUM (regno))
23745     {
23746       if (regno <= D7_VFP_REGNUM)
23747         return VFP_D0_D7_REGS;
23748       else if (regno <= LAST_LO_VFP_REGNUM)
23749         return VFP_LO_REGS;
23750       else
23751         return VFP_HI_REGS;
23752     }
23753
23754   if (IS_IWMMXT_REGNUM (regno))
23755     return IWMMXT_REGS;
23756
23757   if (IS_IWMMXT_GR_REGNUM (regno))
23758     return IWMMXT_GR_REGS;
23759
23760   return NO_REGS;
23761 }
23762
23763 /* Handle a special case when computing the offset
23764    of an argument from the frame pointer.  */
23765 int
23766 arm_debugger_arg_offset (int value, rtx addr)
23767 {
23768   rtx_insn *insn;
23769
23770   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23771   if (value != 0)
23772     return 0;
23773
23774   /* We can only cope with the case where the address is held in a register.  */
23775   if (!REG_P (addr))
23776     return 0;
23777
23778   /* If we are using the frame pointer to point at the argument, then
23779      an offset of 0 is correct.  */
23780   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23781     return 0;
23782
23783   /* If we are using the stack pointer to point at the
23784      argument, then an offset of 0 is correct.  */
23785   /* ??? Check this is consistent with thumb2 frame layout.  */
23786   if ((TARGET_THUMB || !frame_pointer_needed)
23787       && REGNO (addr) == SP_REGNUM)
23788     return 0;
23789
23790   /* Oh dear.  The argument is pointed to by a register rather
23791      than being held in a register, or being stored at a known
23792      offset from the frame pointer.  Since GDB only understands
23793      those two kinds of argument we must translate the address
23794      held in the register into an offset from the frame pointer.
23795      We do this by searching through the insns for the function
23796      looking to see where this register gets its value.  If the
23797      register is initialized from the frame pointer plus an offset
23798      then we are in luck and we can continue, otherwise we give up.
23799
23800      This code is exercised by producing debugging information
23801      for a function with arguments like this:
23802
23803            double func (double a, double b, int c, double d) {return d;}
23804
23805      Without this code the stab for parameter 'd' will be set to
23806      an offset of 0 from the frame pointer, rather than 8.  */
23807
23808   /* The if() statement says:
23809
23810      If the insn is a normal instruction
23811      and if the insn is setting the value in a register
23812      and if the register being set is the register holding the address of the argument
23813      and if the address is computing by an addition
23814      that involves adding to a register
23815      which is the frame pointer
23816      a constant integer
23817
23818      then...  */
23819
23820   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23821     {
23822       if (   NONJUMP_INSN_P (insn)
23823           && GET_CODE (PATTERN (insn)) == SET
23824           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23825           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23826           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23827           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23828           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23829              )
23830         {
23831           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23832
23833           break;
23834         }
23835     }
23836
23837   if (value == 0)
23838     {
23839       debug_rtx (addr);
23840       warning (0, "unable to compute real location of stacked parameter");
23841       value = 8; /* XXX magic hack */
23842     }
23843
23844   return value;
23845 }
23846 \f
23847 /* Implement TARGET_PROMOTED_TYPE.  */
23848
23849 static tree
23850 arm_promoted_type (const_tree t)
23851 {
23852   if (SCALAR_FLOAT_TYPE_P (t)
23853       && TYPE_PRECISION (t) == 16
23854       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23855     return float_type_node;
23856   return NULL_TREE;
23857 }
23858
23859 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23860    This simply adds HFmode as a supported mode; even though we don't
23861    implement arithmetic on this type directly, it's supported by
23862    optabs conversions, much the way the double-word arithmetic is
23863    special-cased in the default hook.  */
23864
23865 static bool
23866 arm_scalar_mode_supported_p (scalar_mode mode)
23867 {
23868   if (mode == HFmode)
23869     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23870   else if (ALL_FIXED_POINT_MODE_P (mode))
23871     return true;
23872   else
23873     return default_scalar_mode_supported_p (mode);
23874 }
23875
23876 /* Set the value of FLT_EVAL_METHOD.
23877    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23878
23879     0: evaluate all operations and constants, whose semantic type has at
23880        most the range and precision of type float, to the range and
23881        precision of float; evaluate all other operations and constants to
23882        the range and precision of the semantic type;
23883
23884     N, where _FloatN is a supported interchange floating type
23885        evaluate all operations and constants, whose semantic type has at
23886        most the range and precision of _FloatN type, to the range and
23887        precision of the _FloatN type; evaluate all other operations and
23888        constants to the range and precision of the semantic type;
23889
23890    If we have the ARMv8.2-A extensions then we support _Float16 in native
23891    precision, so we should set this to 16.  Otherwise, we support the type,
23892    but want to evaluate expressions in float precision, so set this to
23893    0.  */
23894
23895 static enum flt_eval_method
23896 arm_excess_precision (enum excess_precision_type type)
23897 {
23898   switch (type)
23899     {
23900       case EXCESS_PRECISION_TYPE_FAST:
23901       case EXCESS_PRECISION_TYPE_STANDARD:
23902         /* We can calculate either in 16-bit range and precision or
23903            32-bit range and precision.  Make that decision based on whether
23904            we have native support for the ARMv8.2-A 16-bit floating-point
23905            instructions or not.  */
23906         return (TARGET_VFP_FP16INST
23907                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23908                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23909       case EXCESS_PRECISION_TYPE_IMPLICIT:
23910         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23911       default:
23912         gcc_unreachable ();
23913     }
23914   return FLT_EVAL_METHOD_UNPREDICTABLE;
23915 }
23916
23917
23918 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23919    _Float16 if we are using anything other than ieee format for 16-bit
23920    floating point.  Otherwise, punt to the default implementation.  */
23921 static opt_scalar_float_mode
23922 arm_floatn_mode (int n, bool extended)
23923 {
23924   if (!extended && n == 16)
23925     {
23926       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23927         return HFmode;
23928       return opt_scalar_float_mode ();
23929     }
23930
23931   return default_floatn_mode (n, extended);
23932 }
23933
23934
23935 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23936    not to early-clobber SRC registers in the process.
23937
23938    We assume that the operands described by SRC and DEST represent a
23939    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23940    number of components into which the copy has been decomposed.  */
23941 void
23942 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23943 {
23944   unsigned int i;
23945
23946   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23947       || REGNO (operands[0]) < REGNO (operands[1]))
23948     {
23949       for (i = 0; i < count; i++)
23950         {
23951           operands[2 * i] = dest[i];
23952           operands[2 * i + 1] = src[i];
23953         }
23954     }
23955   else
23956     {
23957       for (i = 0; i < count; i++)
23958         {
23959           operands[2 * i] = dest[count - i - 1];
23960           operands[2 * i + 1] = src[count - i - 1];
23961         }
23962     }
23963 }
23964
23965 /* Split operands into moves from op[1] + op[2] into op[0].  */
23966
23967 void
23968 neon_split_vcombine (rtx operands[3])
23969 {
23970   unsigned int dest = REGNO (operands[0]);
23971   unsigned int src1 = REGNO (operands[1]);
23972   unsigned int src2 = REGNO (operands[2]);
23973   machine_mode halfmode = GET_MODE (operands[1]);
23974   unsigned int halfregs = REG_NREGS (operands[1]);
23975   rtx destlo, desthi;
23976
23977   if (src1 == dest && src2 == dest + halfregs)
23978     {
23979       /* No-op move.  Can't split to nothing; emit something.  */
23980       emit_note (NOTE_INSN_DELETED);
23981       return;
23982     }
23983
23984   /* Preserve register attributes for variable tracking.  */
23985   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23986   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23987                                GET_MODE_SIZE (halfmode));
23988
23989   /* Special case of reversed high/low parts.  Use VSWP.  */
23990   if (src2 == dest && src1 == dest + halfregs)
23991     {
23992       rtx x = gen_rtx_SET (destlo, operands[1]);
23993       rtx y = gen_rtx_SET (desthi, operands[2]);
23994       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23995       return;
23996     }
23997
23998   if (!reg_overlap_mentioned_p (operands[2], destlo))
23999     {
24000       /* Try to avoid unnecessary moves if part of the result
24001          is in the right place already.  */
24002       if (src1 != dest)
24003         emit_move_insn (destlo, operands[1]);
24004       if (src2 != dest + halfregs)
24005         emit_move_insn (desthi, operands[2]);
24006     }
24007   else
24008     {
24009       if (src2 != dest + halfregs)
24010         emit_move_insn (desthi, operands[2]);
24011       if (src1 != dest)
24012         emit_move_insn (destlo, operands[1]);
24013     }
24014 }
24015 \f
24016 /* Return the number (counting from 0) of
24017    the least significant set bit in MASK.  */
24018
24019 inline static int
24020 number_of_first_bit_set (unsigned mask)
24021 {
24022   return ctz_hwi (mask);
24023 }
24024
24025 /* Like emit_multi_reg_push, but allowing for a different set of
24026    registers to be described as saved.  MASK is the set of registers
24027    to be saved; REAL_REGS is the set of registers to be described as
24028    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24029
24030 static rtx_insn *
24031 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24032 {
24033   unsigned long regno;
24034   rtx par[10], tmp, reg;
24035   rtx_insn *insn;
24036   int i, j;
24037
24038   /* Build the parallel of the registers actually being stored.  */
24039   for (i = 0; mask; ++i, mask &= mask - 1)
24040     {
24041       regno = ctz_hwi (mask);
24042       reg = gen_rtx_REG (SImode, regno);
24043
24044       if (i == 0)
24045         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24046       else
24047         tmp = gen_rtx_USE (VOIDmode, reg);
24048
24049       par[i] = tmp;
24050     }
24051
24052   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24053   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24054   tmp = gen_frame_mem (BLKmode, tmp);
24055   tmp = gen_rtx_SET (tmp, par[0]);
24056   par[0] = tmp;
24057
24058   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24059   insn = emit_insn (tmp);
24060
24061   /* Always build the stack adjustment note for unwind info.  */
24062   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24063   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24064   par[0] = tmp;
24065
24066   /* Build the parallel of the registers recorded as saved for unwind.  */
24067   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24068     {
24069       regno = ctz_hwi (real_regs);
24070       reg = gen_rtx_REG (SImode, regno);
24071
24072       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24073       tmp = gen_frame_mem (SImode, tmp);
24074       tmp = gen_rtx_SET (tmp, reg);
24075       RTX_FRAME_RELATED_P (tmp) = 1;
24076       par[j + 1] = tmp;
24077     }
24078
24079   if (j == 0)
24080     tmp = par[0];
24081   else
24082     {
24083       RTX_FRAME_RELATED_P (par[0]) = 1;
24084       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24085     }
24086
24087   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24088
24089   return insn;
24090 }
24091
24092 /* Emit code to push or pop registers to or from the stack.  F is the
24093    assembly file.  MASK is the registers to pop.  */
24094 static void
24095 thumb_pop (FILE *f, unsigned long mask)
24096 {
24097   int regno;
24098   int lo_mask = mask & 0xFF;
24099
24100   gcc_assert (mask);
24101
24102   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24103     {
24104       /* Special case.  Do not generate a POP PC statement here, do it in
24105          thumb_exit() */
24106       thumb_exit (f, -1);
24107       return;
24108     }
24109
24110   fprintf (f, "\tpop\t{");
24111
24112   /* Look at the low registers first.  */
24113   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24114     {
24115       if (lo_mask & 1)
24116         {
24117           asm_fprintf (f, "%r", regno);
24118
24119           if ((lo_mask & ~1) != 0)
24120             fprintf (f, ", ");
24121         }
24122     }
24123
24124   if (mask & (1 << PC_REGNUM))
24125     {
24126       /* Catch popping the PC.  */
24127       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24128           || IS_CMSE_ENTRY (arm_current_func_type ()))
24129         {
24130           /* The PC is never poped directly, instead
24131              it is popped into r3 and then BX is used.  */
24132           fprintf (f, "}\n");
24133
24134           thumb_exit (f, -1);
24135
24136           return;
24137         }
24138       else
24139         {
24140           if (mask & 0xFF)
24141             fprintf (f, ", ");
24142
24143           asm_fprintf (f, "%r", PC_REGNUM);
24144         }
24145     }
24146
24147   fprintf (f, "}\n");
24148 }
24149
24150 /* Generate code to return from a thumb function.
24151    If 'reg_containing_return_addr' is -1, then the return address is
24152    actually on the stack, at the stack pointer.
24153
24154    Note: do not forget to update length attribute of corresponding insn pattern
24155    when changing assembly output (eg. length attribute of epilogue_insns when
24156    updating Armv8-M Baseline Security Extensions register clearing
24157    sequences).  */
24158 static void
24159 thumb_exit (FILE *f, int reg_containing_return_addr)
24160 {
24161   unsigned regs_available_for_popping;
24162   unsigned regs_to_pop;
24163   int pops_needed;
24164   unsigned available;
24165   unsigned required;
24166   machine_mode mode;
24167   int size;
24168   int restore_a4 = FALSE;
24169
24170   /* Compute the registers we need to pop.  */
24171   regs_to_pop = 0;
24172   pops_needed = 0;
24173
24174   if (reg_containing_return_addr == -1)
24175     {
24176       regs_to_pop |= 1 << LR_REGNUM;
24177       ++pops_needed;
24178     }
24179
24180   if (TARGET_BACKTRACE)
24181     {
24182       /* Restore the (ARM) frame pointer and stack pointer.  */
24183       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24184       pops_needed += 2;
24185     }
24186
24187   /* If there is nothing to pop then just emit the BX instruction and
24188      return.  */
24189   if (pops_needed == 0)
24190     {
24191       if (crtl->calls_eh_return)
24192         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24193
24194       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24195         {
24196           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24197                        reg_containing_return_addr);
24198           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24199         }
24200       else
24201         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24202       return;
24203     }
24204   /* Otherwise if we are not supporting interworking and we have not created
24205      a backtrace structure and the function was not entered in ARM mode then
24206      just pop the return address straight into the PC.  */
24207   else if (!TARGET_INTERWORK
24208            && !TARGET_BACKTRACE
24209            && !is_called_in_ARM_mode (current_function_decl)
24210            && !crtl->calls_eh_return
24211            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24212     {
24213       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24214       return;
24215     }
24216
24217   /* Find out how many of the (return) argument registers we can corrupt.  */
24218   regs_available_for_popping = 0;
24219
24220   /* If returning via __builtin_eh_return, the bottom three registers
24221      all contain information needed for the return.  */
24222   if (crtl->calls_eh_return)
24223     size = 12;
24224   else
24225     {
24226       /* If we can deduce the registers used from the function's
24227          return value.  This is more reliable that examining
24228          df_regs_ever_live_p () because that will be set if the register is
24229          ever used in the function, not just if the register is used
24230          to hold a return value.  */
24231
24232       if (crtl->return_rtx != 0)
24233         mode = GET_MODE (crtl->return_rtx);
24234       else
24235         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24236
24237       size = GET_MODE_SIZE (mode);
24238
24239       if (size == 0)
24240         {
24241           /* In a void function we can use any argument register.
24242              In a function that returns a structure on the stack
24243              we can use the second and third argument registers.  */
24244           if (mode == VOIDmode)
24245             regs_available_for_popping =
24246               (1 << ARG_REGISTER (1))
24247               | (1 << ARG_REGISTER (2))
24248               | (1 << ARG_REGISTER (3));
24249           else
24250             regs_available_for_popping =
24251               (1 << ARG_REGISTER (2))
24252               | (1 << ARG_REGISTER (3));
24253         }
24254       else if (size <= 4)
24255         regs_available_for_popping =
24256           (1 << ARG_REGISTER (2))
24257           | (1 << ARG_REGISTER (3));
24258       else if (size <= 8)
24259         regs_available_for_popping =
24260           (1 << ARG_REGISTER (3));
24261     }
24262
24263   /* Match registers to be popped with registers into which we pop them.  */
24264   for (available = regs_available_for_popping,
24265        required  = regs_to_pop;
24266        required != 0 && available != 0;
24267        available &= ~(available & - available),
24268        required  &= ~(required  & - required))
24269     -- pops_needed;
24270
24271   /* If we have any popping registers left over, remove them.  */
24272   if (available > 0)
24273     regs_available_for_popping &= ~available;
24274
24275   /* Otherwise if we need another popping register we can use
24276      the fourth argument register.  */
24277   else if (pops_needed)
24278     {
24279       /* If we have not found any free argument registers and
24280          reg a4 contains the return address, we must move it.  */
24281       if (regs_available_for_popping == 0
24282           && reg_containing_return_addr == LAST_ARG_REGNUM)
24283         {
24284           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24285           reg_containing_return_addr = LR_REGNUM;
24286         }
24287       else if (size > 12)
24288         {
24289           /* Register a4 is being used to hold part of the return value,
24290              but we have dire need of a free, low register.  */
24291           restore_a4 = TRUE;
24292
24293           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24294         }
24295
24296       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24297         {
24298           /* The fourth argument register is available.  */
24299           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24300
24301           --pops_needed;
24302         }
24303     }
24304
24305   /* Pop as many registers as we can.  */
24306   thumb_pop (f, regs_available_for_popping);
24307
24308   /* Process the registers we popped.  */
24309   if (reg_containing_return_addr == -1)
24310     {
24311       /* The return address was popped into the lowest numbered register.  */
24312       regs_to_pop &= ~(1 << LR_REGNUM);
24313
24314       reg_containing_return_addr =
24315         number_of_first_bit_set (regs_available_for_popping);
24316
24317       /* Remove this register for the mask of available registers, so that
24318          the return address will not be corrupted by further pops.  */
24319       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24320     }
24321
24322   /* If we popped other registers then handle them here.  */
24323   if (regs_available_for_popping)
24324     {
24325       int frame_pointer;
24326
24327       /* Work out which register currently contains the frame pointer.  */
24328       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24329
24330       /* Move it into the correct place.  */
24331       asm_fprintf (f, "\tmov\t%r, %r\n",
24332                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24333
24334       /* (Temporarily) remove it from the mask of popped registers.  */
24335       regs_available_for_popping &= ~(1 << frame_pointer);
24336       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24337
24338       if (regs_available_for_popping)
24339         {
24340           int stack_pointer;
24341
24342           /* We popped the stack pointer as well,
24343              find the register that contains it.  */
24344           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24345
24346           /* Move it into the stack register.  */
24347           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24348
24349           /* At this point we have popped all necessary registers, so
24350              do not worry about restoring regs_available_for_popping
24351              to its correct value:
24352
24353              assert (pops_needed == 0)
24354              assert (regs_available_for_popping == (1 << frame_pointer))
24355              assert (regs_to_pop == (1 << STACK_POINTER))  */
24356         }
24357       else
24358         {
24359           /* Since we have just move the popped value into the frame
24360              pointer, the popping register is available for reuse, and
24361              we know that we still have the stack pointer left to pop.  */
24362           regs_available_for_popping |= (1 << frame_pointer);
24363         }
24364     }
24365
24366   /* If we still have registers left on the stack, but we no longer have
24367      any registers into which we can pop them, then we must move the return
24368      address into the link register and make available the register that
24369      contained it.  */
24370   if (regs_available_for_popping == 0 && pops_needed > 0)
24371     {
24372       regs_available_for_popping |= 1 << reg_containing_return_addr;
24373
24374       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24375                    reg_containing_return_addr);
24376
24377       reg_containing_return_addr = LR_REGNUM;
24378     }
24379
24380   /* If we have registers left on the stack then pop some more.
24381      We know that at most we will want to pop FP and SP.  */
24382   if (pops_needed > 0)
24383     {
24384       int  popped_into;
24385       int  move_to;
24386
24387       thumb_pop (f, regs_available_for_popping);
24388
24389       /* We have popped either FP or SP.
24390          Move whichever one it is into the correct register.  */
24391       popped_into = number_of_first_bit_set (regs_available_for_popping);
24392       move_to     = number_of_first_bit_set (regs_to_pop);
24393
24394       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24395       --pops_needed;
24396     }
24397
24398   /* If we still have not popped everything then we must have only
24399      had one register available to us and we are now popping the SP.  */
24400   if (pops_needed > 0)
24401     {
24402       int  popped_into;
24403
24404       thumb_pop (f, regs_available_for_popping);
24405
24406       popped_into = number_of_first_bit_set (regs_available_for_popping);
24407
24408       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24409       /*
24410         assert (regs_to_pop == (1 << STACK_POINTER))
24411         assert (pops_needed == 1)
24412       */
24413     }
24414
24415   /* If necessary restore the a4 register.  */
24416   if (restore_a4)
24417     {
24418       if (reg_containing_return_addr != LR_REGNUM)
24419         {
24420           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24421           reg_containing_return_addr = LR_REGNUM;
24422         }
24423
24424       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24425     }
24426
24427   if (crtl->calls_eh_return)
24428     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24429
24430   /* Return to caller.  */
24431   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24432     {
24433       /* This is for the cases where LR is not being used to contain the return
24434          address.  It may therefore contain information that we might not want
24435          to leak, hence it must be cleared.  The value in R0 will never be a
24436          secret at this point, so it is safe to use it, see the clearing code
24437          in 'cmse_nonsecure_entry_clear_before_return'.  */
24438       if (reg_containing_return_addr != LR_REGNUM)
24439         asm_fprintf (f, "\tmov\tlr, r0\n");
24440
24441       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24442       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24443     }
24444   else
24445     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24446 }
24447 \f
24448 /* Scan INSN just before assembler is output for it.
24449    For Thumb-1, we track the status of the condition codes; this
24450    information is used in the cbranchsi4_insn pattern.  */
24451 void
24452 thumb1_final_prescan_insn (rtx_insn *insn)
24453 {
24454   if (flag_print_asm_name)
24455     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24456                  INSN_ADDRESSES (INSN_UID (insn)));
24457   /* Don't overwrite the previous setter when we get to a cbranch.  */
24458   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24459     {
24460       enum attr_conds conds;
24461
24462       if (cfun->machine->thumb1_cc_insn)
24463         {
24464           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24465               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24466             CC_STATUS_INIT;
24467         }
24468       conds = get_attr_conds (insn);
24469       if (conds == CONDS_SET)
24470         {
24471           rtx set = single_set (insn);
24472           cfun->machine->thumb1_cc_insn = insn;
24473           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24474           cfun->machine->thumb1_cc_op1 = const0_rtx;
24475           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24476           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24477             {
24478               rtx src1 = XEXP (SET_SRC (set), 1);
24479               if (src1 == const0_rtx)
24480                 cfun->machine->thumb1_cc_mode = CCmode;
24481             }
24482           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24483             {
24484               /* Record the src register operand instead of dest because
24485                  cprop_hardreg pass propagates src.  */
24486               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24487             }
24488         }
24489       else if (conds != CONDS_NOCOND)
24490         cfun->machine->thumb1_cc_insn = NULL_RTX;
24491     }
24492
24493     /* Check if unexpected far jump is used.  */
24494     if (cfun->machine->lr_save_eliminated
24495         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24496       internal_error("Unexpected thumb1 far jump");
24497 }
24498
24499 int
24500 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24501 {
24502   unsigned HOST_WIDE_INT mask = 0xff;
24503   int i;
24504
24505   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24506   if (val == 0) /* XXX */
24507     return 0;
24508
24509   for (i = 0; i < 25; i++)
24510     if ((val & (mask << i)) == val)
24511       return 1;
24512
24513   return 0;
24514 }
24515
24516 /* Returns nonzero if the current function contains,
24517    or might contain a far jump.  */
24518 static int
24519 thumb_far_jump_used_p (void)
24520 {
24521   rtx_insn *insn;
24522   bool far_jump = false;
24523   unsigned int func_size = 0;
24524
24525   /* If we have already decided that far jumps may be used,
24526      do not bother checking again, and always return true even if
24527      it turns out that they are not being used.  Once we have made
24528      the decision that far jumps are present (and that hence the link
24529      register will be pushed onto the stack) we cannot go back on it.  */
24530   if (cfun->machine->far_jump_used)
24531     return 1;
24532
24533   /* If this function is not being called from the prologue/epilogue
24534      generation code then it must be being called from the
24535      INITIAL_ELIMINATION_OFFSET macro.  */
24536   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24537     {
24538       /* In this case we know that we are being asked about the elimination
24539          of the arg pointer register.  If that register is not being used,
24540          then there are no arguments on the stack, and we do not have to
24541          worry that a far jump might force the prologue to push the link
24542          register, changing the stack offsets.  In this case we can just
24543          return false, since the presence of far jumps in the function will
24544          not affect stack offsets.
24545
24546          If the arg pointer is live (or if it was live, but has now been
24547          eliminated and so set to dead) then we do have to test to see if
24548          the function might contain a far jump.  This test can lead to some
24549          false negatives, since before reload is completed, then length of
24550          branch instructions is not known, so gcc defaults to returning their
24551          longest length, which in turn sets the far jump attribute to true.
24552
24553          A false negative will not result in bad code being generated, but it
24554          will result in a needless push and pop of the link register.  We
24555          hope that this does not occur too often.
24556
24557          If we need doubleword stack alignment this could affect the other
24558          elimination offsets so we can't risk getting it wrong.  */
24559       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24560         cfun->machine->arg_pointer_live = 1;
24561       else if (!cfun->machine->arg_pointer_live)
24562         return 0;
24563     }
24564
24565   /* We should not change far_jump_used during or after reload, as there is
24566      no chance to change stack frame layout.  */
24567   if (reload_in_progress || reload_completed)
24568     return 0;
24569
24570   /* Check to see if the function contains a branch
24571      insn with the far jump attribute set.  */
24572   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24573     {
24574       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24575         {
24576           far_jump = true;
24577         }
24578       func_size += get_attr_length (insn);
24579     }
24580
24581   /* Attribute far_jump will always be true for thumb1 before
24582      shorten_branch pass.  So checking far_jump attribute before
24583      shorten_branch isn't much useful.
24584
24585      Following heuristic tries to estimate more accurately if a far jump
24586      may finally be used.  The heuristic is very conservative as there is
24587      no chance to roll-back the decision of not to use far jump.
24588
24589      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24590      2-byte insn is associated with a 4 byte constant pool.  Using
24591      function size 2048/3 as the threshold is conservative enough.  */
24592   if (far_jump)
24593     {
24594       if ((func_size * 3) >= 2048)
24595         {
24596           /* Record the fact that we have decided that
24597              the function does use far jumps.  */
24598           cfun->machine->far_jump_used = 1;
24599           return 1;
24600         }
24601     }
24602
24603   return 0;
24604 }
24605
24606 /* Return nonzero if FUNC must be entered in ARM mode.  */
24607 static bool
24608 is_called_in_ARM_mode (tree func)
24609 {
24610   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24611
24612   /* Ignore the problem about functions whose address is taken.  */
24613   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24614     return true;
24615
24616 #ifdef ARM_PE
24617   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24618 #else
24619   return false;
24620 #endif
24621 }
24622
24623 /* Given the stack offsets and register mask in OFFSETS, decide how
24624    many additional registers to push instead of subtracting a constant
24625    from SP.  For epilogues the principle is the same except we use pop.
24626    FOR_PROLOGUE indicates which we're generating.  */
24627 static int
24628 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24629 {
24630   HOST_WIDE_INT amount;
24631   unsigned long live_regs_mask = offsets->saved_regs_mask;
24632   /* Extract a mask of the ones we can give to the Thumb's push/pop
24633      instruction.  */
24634   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24635   /* Then count how many other high registers will need to be pushed.  */
24636   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24637   int n_free, reg_base, size;
24638
24639   if (!for_prologue && frame_pointer_needed)
24640     amount = offsets->locals_base - offsets->saved_regs;
24641   else
24642     amount = offsets->outgoing_args - offsets->saved_regs;
24643
24644   /* If the stack frame size is 512 exactly, we can save one load
24645      instruction, which should make this a win even when optimizing
24646      for speed.  */
24647   if (!optimize_size && amount != 512)
24648     return 0;
24649
24650   /* Can't do this if there are high registers to push.  */
24651   if (high_regs_pushed != 0)
24652     return 0;
24653
24654   /* Shouldn't do it in the prologue if no registers would normally
24655      be pushed at all.  In the epilogue, also allow it if we'll have
24656      a pop insn for the PC.  */
24657   if  (l_mask == 0
24658        && (for_prologue
24659            || TARGET_BACKTRACE
24660            || (live_regs_mask & 1 << LR_REGNUM) == 0
24661            || TARGET_INTERWORK
24662            || crtl->args.pretend_args_size != 0))
24663     return 0;
24664
24665   /* Don't do this if thumb_expand_prologue wants to emit instructions
24666      between the push and the stack frame allocation.  */
24667   if (for_prologue
24668       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24669           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24670     return 0;
24671
24672   reg_base = 0;
24673   n_free = 0;
24674   if (!for_prologue)
24675     {
24676       size = arm_size_return_regs ();
24677       reg_base = ARM_NUM_INTS (size);
24678       live_regs_mask >>= reg_base;
24679     }
24680
24681   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24682          && (for_prologue || call_used_regs[reg_base + n_free]))
24683     {
24684       live_regs_mask >>= 1;
24685       n_free++;
24686     }
24687
24688   if (n_free == 0)
24689     return 0;
24690   gcc_assert (amount / 4 * 4 == amount);
24691
24692   if (amount >= 512 && (amount - n_free * 4) < 512)
24693     return (amount - 508) / 4;
24694   if (amount <= n_free * 4)
24695     return amount / 4;
24696   return 0;
24697 }
24698
24699 /* The bits which aren't usefully expanded as rtl.  */
24700 const char *
24701 thumb1_unexpanded_epilogue (void)
24702 {
24703   arm_stack_offsets *offsets;
24704   int regno;
24705   unsigned long live_regs_mask = 0;
24706   int high_regs_pushed = 0;
24707   int extra_pop;
24708   int had_to_push_lr;
24709   int size;
24710
24711   if (cfun->machine->return_used_this_function != 0)
24712     return "";
24713
24714   if (IS_NAKED (arm_current_func_type ()))
24715     return "";
24716
24717   offsets = arm_get_frame_offsets ();
24718   live_regs_mask = offsets->saved_regs_mask;
24719   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24720
24721   /* If we can deduce the registers used from the function's return value.
24722      This is more reliable that examining df_regs_ever_live_p () because that
24723      will be set if the register is ever used in the function, not just if
24724      the register is used to hold a return value.  */
24725   size = arm_size_return_regs ();
24726
24727   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24728   if (extra_pop > 0)
24729     {
24730       unsigned long extra_mask = (1 << extra_pop) - 1;
24731       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24732     }
24733
24734   /* The prolog may have pushed some high registers to use as
24735      work registers.  e.g. the testsuite file:
24736      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24737      compiles to produce:
24738         push    {r4, r5, r6, r7, lr}
24739         mov     r7, r9
24740         mov     r6, r8
24741         push    {r6, r7}
24742      as part of the prolog.  We have to undo that pushing here.  */
24743
24744   if (high_regs_pushed)
24745     {
24746       unsigned long mask = live_regs_mask & 0xff;
24747       int next_hi_reg;
24748
24749       /* The available low registers depend on the size of the value we are
24750          returning.  */
24751       if (size <= 12)
24752         mask |=  1 << 3;
24753       if (size <= 8)
24754         mask |= 1 << 2;
24755
24756       if (mask == 0)
24757         /* Oh dear!  We have no low registers into which we can pop
24758            high registers!  */
24759         internal_error
24760           ("no low registers available for popping high registers");
24761
24762       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24763         if (live_regs_mask & (1 << next_hi_reg))
24764           break;
24765
24766       while (high_regs_pushed)
24767         {
24768           /* Find lo register(s) into which the high register(s) can
24769              be popped.  */
24770           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24771             {
24772               if (mask & (1 << regno))
24773                 high_regs_pushed--;
24774               if (high_regs_pushed == 0)
24775                 break;
24776             }
24777
24778           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24779
24780           /* Pop the values into the low register(s).  */
24781           thumb_pop (asm_out_file, mask);
24782
24783           /* Move the value(s) into the high registers.  */
24784           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24785             {
24786               if (mask & (1 << regno))
24787                 {
24788                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24789                                regno);
24790
24791                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24792                     if (live_regs_mask & (1 << next_hi_reg))
24793                       break;
24794                 }
24795             }
24796         }
24797       live_regs_mask &= ~0x0f00;
24798     }
24799
24800   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24801   live_regs_mask &= 0xff;
24802
24803   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24804     {
24805       /* Pop the return address into the PC.  */
24806       if (had_to_push_lr)
24807         live_regs_mask |= 1 << PC_REGNUM;
24808
24809       /* Either no argument registers were pushed or a backtrace
24810          structure was created which includes an adjusted stack
24811          pointer, so just pop everything.  */
24812       if (live_regs_mask)
24813         thumb_pop (asm_out_file, live_regs_mask);
24814
24815       /* We have either just popped the return address into the
24816          PC or it is was kept in LR for the entire function.
24817          Note that thumb_pop has already called thumb_exit if the
24818          PC was in the list.  */
24819       if (!had_to_push_lr)
24820         thumb_exit (asm_out_file, LR_REGNUM);
24821     }
24822   else
24823     {
24824       /* Pop everything but the return address.  */
24825       if (live_regs_mask)
24826         thumb_pop (asm_out_file, live_regs_mask);
24827
24828       if (had_to_push_lr)
24829         {
24830           if (size > 12)
24831             {
24832               /* We have no free low regs, so save one.  */
24833               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24834                            LAST_ARG_REGNUM);
24835             }
24836
24837           /* Get the return address into a temporary register.  */
24838           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24839
24840           if (size > 12)
24841             {
24842               /* Move the return address to lr.  */
24843               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24844                            LAST_ARG_REGNUM);
24845               /* Restore the low register.  */
24846               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24847                            IP_REGNUM);
24848               regno = LR_REGNUM;
24849             }
24850           else
24851             regno = LAST_ARG_REGNUM;
24852         }
24853       else
24854         regno = LR_REGNUM;
24855
24856       /* Remove the argument registers that were pushed onto the stack.  */
24857       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24858                    SP_REGNUM, SP_REGNUM,
24859                    crtl->args.pretend_args_size);
24860
24861       thumb_exit (asm_out_file, regno);
24862     }
24863
24864   return "";
24865 }
24866
24867 /* Functions to save and restore machine-specific function data.  */
24868 static struct machine_function *
24869 arm_init_machine_status (void)
24870 {
24871   struct machine_function *machine;
24872   machine = ggc_cleared_alloc<machine_function> ();
24873
24874 #if ARM_FT_UNKNOWN != 0
24875   machine->func_type = ARM_FT_UNKNOWN;
24876 #endif
24877   machine->static_chain_stack_bytes = -1;
24878   return machine;
24879 }
24880
24881 /* Return an RTX indicating where the return address to the
24882    calling function can be found.  */
24883 rtx
24884 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24885 {
24886   if (count != 0)
24887     return NULL_RTX;
24888
24889   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24890 }
24891
24892 /* Do anything needed before RTL is emitted for each function.  */
24893 void
24894 arm_init_expanders (void)
24895 {
24896   /* Arrange to initialize and mark the machine per-function status.  */
24897   init_machine_status = arm_init_machine_status;
24898
24899   /* This is to stop the combine pass optimizing away the alignment
24900      adjustment of va_arg.  */
24901   /* ??? It is claimed that this should not be necessary.  */
24902   if (cfun)
24903     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24904 }
24905
24906 /* Check that FUNC is called with a different mode.  */
24907
24908 bool
24909 arm_change_mode_p (tree func)
24910 {
24911   if (TREE_CODE (func) != FUNCTION_DECL)
24912     return false;
24913
24914   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24915
24916   if (!callee_tree)
24917     callee_tree = target_option_default_node;
24918
24919   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24920   int flags = callee_opts->x_target_flags;
24921
24922   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24923 }
24924
24925 /* Like arm_compute_initial_elimination offset.  Simpler because there
24926    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24927    to point at the base of the local variables after static stack
24928    space for a function has been allocated.  */
24929
24930 HOST_WIDE_INT
24931 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24932 {
24933   arm_stack_offsets *offsets;
24934
24935   offsets = arm_get_frame_offsets ();
24936
24937   switch (from)
24938     {
24939     case ARG_POINTER_REGNUM:
24940       switch (to)
24941         {
24942         case STACK_POINTER_REGNUM:
24943           return offsets->outgoing_args - offsets->saved_args;
24944
24945         case FRAME_POINTER_REGNUM:
24946           return offsets->soft_frame - offsets->saved_args;
24947
24948         case ARM_HARD_FRAME_POINTER_REGNUM:
24949           return offsets->saved_regs - offsets->saved_args;
24950
24951         case THUMB_HARD_FRAME_POINTER_REGNUM:
24952           return offsets->locals_base - offsets->saved_args;
24953
24954         default:
24955           gcc_unreachable ();
24956         }
24957       break;
24958
24959     case FRAME_POINTER_REGNUM:
24960       switch (to)
24961         {
24962         case STACK_POINTER_REGNUM:
24963           return offsets->outgoing_args - offsets->soft_frame;
24964
24965         case ARM_HARD_FRAME_POINTER_REGNUM:
24966           return offsets->saved_regs - offsets->soft_frame;
24967
24968         case THUMB_HARD_FRAME_POINTER_REGNUM:
24969           return offsets->locals_base - offsets->soft_frame;
24970
24971         default:
24972           gcc_unreachable ();
24973         }
24974       break;
24975
24976     default:
24977       gcc_unreachable ();
24978     }
24979 }
24980
24981 /* Generate the function's prologue.  */
24982
24983 void
24984 thumb1_expand_prologue (void)
24985 {
24986   rtx_insn *insn;
24987
24988   HOST_WIDE_INT amount;
24989   HOST_WIDE_INT size;
24990   arm_stack_offsets *offsets;
24991   unsigned long func_type;
24992   int regno;
24993   unsigned long live_regs_mask;
24994   unsigned long l_mask;
24995   unsigned high_regs_pushed = 0;
24996   bool lr_needs_saving;
24997
24998   func_type = arm_current_func_type ();
24999
25000   /* Naked functions don't have prologues.  */
25001   if (IS_NAKED (func_type))
25002     {
25003       if (flag_stack_usage_info)
25004         current_function_static_stack_size = 0;
25005       return;
25006     }
25007
25008   if (IS_INTERRUPT (func_type))
25009     {
25010       error ("interrupt Service Routines cannot be coded in Thumb mode");
25011       return;
25012     }
25013
25014   if (is_called_in_ARM_mode (current_function_decl))
25015     emit_insn (gen_prologue_thumb1_interwork ());
25016
25017   offsets = arm_get_frame_offsets ();
25018   live_regs_mask = offsets->saved_regs_mask;
25019   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25020
25021   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25022   l_mask = live_regs_mask & 0x40ff;
25023   /* Then count how many other high registers will need to be pushed.  */
25024   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25025
25026   if (crtl->args.pretend_args_size)
25027     {
25028       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25029
25030       if (cfun->machine->uses_anonymous_args)
25031         {
25032           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25033           unsigned long mask;
25034
25035           mask = 1ul << (LAST_ARG_REGNUM + 1);
25036           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25037
25038           insn = thumb1_emit_multi_reg_push (mask, 0);
25039         }
25040       else
25041         {
25042           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25043                                         stack_pointer_rtx, x));
25044         }
25045       RTX_FRAME_RELATED_P (insn) = 1;
25046     }
25047
25048   if (TARGET_BACKTRACE)
25049     {
25050       HOST_WIDE_INT offset = 0;
25051       unsigned work_register;
25052       rtx work_reg, x, arm_hfp_rtx;
25053
25054       /* We have been asked to create a stack backtrace structure.
25055          The code looks like this:
25056
25057          0   .align 2
25058          0   func:
25059          0     sub   SP, #16         Reserve space for 4 registers.
25060          2     push  {R7}            Push low registers.
25061          4     add   R7, SP, #20     Get the stack pointer before the push.
25062          6     str   R7, [SP, #8]    Store the stack pointer
25063                                         (before reserving the space).
25064          8     mov   R7, PC          Get hold of the start of this code + 12.
25065         10     str   R7, [SP, #16]   Store it.
25066         12     mov   R7, FP          Get hold of the current frame pointer.
25067         14     str   R7, [SP, #4]    Store it.
25068         16     mov   R7, LR          Get hold of the current return address.
25069         18     str   R7, [SP, #12]   Store it.
25070         20     add   R7, SP, #16     Point at the start of the
25071                                         backtrace structure.
25072         22     mov   FP, R7          Put this value into the frame pointer.  */
25073
25074       work_register = thumb_find_work_register (live_regs_mask);
25075       work_reg = gen_rtx_REG (SImode, work_register);
25076       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25077
25078       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25079                                     stack_pointer_rtx, GEN_INT (-16)));
25080       RTX_FRAME_RELATED_P (insn) = 1;
25081
25082       if (l_mask)
25083         {
25084           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25085           RTX_FRAME_RELATED_P (insn) = 1;
25086           lr_needs_saving = false;
25087
25088           offset = bit_count (l_mask) * UNITS_PER_WORD;
25089         }
25090
25091       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25092       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25093
25094       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25095       x = gen_frame_mem (SImode, x);
25096       emit_move_insn (x, work_reg);
25097
25098       /* Make sure that the instruction fetching the PC is in the right place
25099          to calculate "start of backtrace creation code + 12".  */
25100       /* ??? The stores using the common WORK_REG ought to be enough to
25101          prevent the scheduler from doing anything weird.  Failing that
25102          we could always move all of the following into an UNSPEC_VOLATILE.  */
25103       if (l_mask)
25104         {
25105           x = gen_rtx_REG (SImode, PC_REGNUM);
25106           emit_move_insn (work_reg, x);
25107
25108           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25109           x = gen_frame_mem (SImode, x);
25110           emit_move_insn (x, work_reg);
25111
25112           emit_move_insn (work_reg, arm_hfp_rtx);
25113
25114           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25115           x = gen_frame_mem (SImode, x);
25116           emit_move_insn (x, work_reg);
25117         }
25118       else
25119         {
25120           emit_move_insn (work_reg, arm_hfp_rtx);
25121
25122           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25123           x = gen_frame_mem (SImode, x);
25124           emit_move_insn (x, work_reg);
25125
25126           x = gen_rtx_REG (SImode, PC_REGNUM);
25127           emit_move_insn (work_reg, x);
25128
25129           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25130           x = gen_frame_mem (SImode, x);
25131           emit_move_insn (x, work_reg);
25132         }
25133
25134       x = gen_rtx_REG (SImode, LR_REGNUM);
25135       emit_move_insn (work_reg, x);
25136
25137       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25138       x = gen_frame_mem (SImode, x);
25139       emit_move_insn (x, work_reg);
25140
25141       x = GEN_INT (offset + 12);
25142       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25143
25144       emit_move_insn (arm_hfp_rtx, work_reg);
25145     }
25146   /* Optimization:  If we are not pushing any low registers but we are going
25147      to push some high registers then delay our first push.  This will just
25148      be a push of LR and we can combine it with the push of the first high
25149      register.  */
25150   else if ((l_mask & 0xff) != 0
25151            || (high_regs_pushed == 0 && lr_needs_saving))
25152     {
25153       unsigned long mask = l_mask;
25154       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25155       insn = thumb1_emit_multi_reg_push (mask, mask);
25156       RTX_FRAME_RELATED_P (insn) = 1;
25157       lr_needs_saving = false;
25158     }
25159
25160   if (high_regs_pushed)
25161     {
25162       unsigned pushable_regs;
25163       unsigned next_hi_reg;
25164       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25165                                                  : crtl->args.info.nregs;
25166       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25167
25168       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25169         if (live_regs_mask & (1 << next_hi_reg))
25170           break;
25171
25172       /* Here we need to mask out registers used for passing arguments
25173          even if they can be pushed.  This is to avoid using them to stash the high
25174          registers.  Such kind of stash may clobber the use of arguments.  */
25175       pushable_regs = l_mask & (~arg_regs_mask);
25176       if (lr_needs_saving)
25177         pushable_regs &= ~(1 << LR_REGNUM);
25178
25179       if (pushable_regs == 0)
25180         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25181
25182       while (high_regs_pushed > 0)
25183         {
25184           unsigned long real_regs_mask = 0;
25185           unsigned long push_mask = 0;
25186
25187           for (regno = LR_REGNUM; regno >= 0; regno --)
25188             {
25189               if (pushable_regs & (1 << regno))
25190                 {
25191                   emit_move_insn (gen_rtx_REG (SImode, regno),
25192                                   gen_rtx_REG (SImode, next_hi_reg));
25193
25194                   high_regs_pushed --;
25195                   real_regs_mask |= (1 << next_hi_reg);
25196                   push_mask |= (1 << regno);
25197
25198                   if (high_regs_pushed)
25199                     {
25200                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25201                            next_hi_reg --)
25202                         if (live_regs_mask & (1 << next_hi_reg))
25203                           break;
25204                     }
25205                   else
25206                     break;
25207                 }
25208             }
25209
25210           /* If we had to find a work register and we have not yet
25211              saved the LR then add it to the list of regs to push.  */
25212           if (lr_needs_saving)
25213             {
25214               push_mask |= 1 << LR_REGNUM;
25215               real_regs_mask |= 1 << LR_REGNUM;
25216               lr_needs_saving = false;
25217             }
25218
25219           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25220           RTX_FRAME_RELATED_P (insn) = 1;
25221         }
25222     }
25223
25224   /* Load the pic register before setting the frame pointer,
25225      so we can use r7 as a temporary work register.  */
25226   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25227     arm_load_pic_register (live_regs_mask);
25228
25229   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25230     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25231                     stack_pointer_rtx);
25232
25233   size = offsets->outgoing_args - offsets->saved_args;
25234   if (flag_stack_usage_info)
25235     current_function_static_stack_size = size;
25236
25237   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25238   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25239        || flag_stack_clash_protection)
25240       && size)
25241     sorry ("-fstack-check=specific for Thumb-1");
25242
25243   amount = offsets->outgoing_args - offsets->saved_regs;
25244   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25245   if (amount)
25246     {
25247       if (amount < 512)
25248         {
25249           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25250                                         GEN_INT (- amount)));
25251           RTX_FRAME_RELATED_P (insn) = 1;
25252         }
25253       else
25254         {
25255           rtx reg, dwarf;
25256
25257           /* The stack decrement is too big for an immediate value in a single
25258              insn.  In theory we could issue multiple subtracts, but after
25259              three of them it becomes more space efficient to place the full
25260              value in the constant pool and load into a register.  (Also the
25261              ARM debugger really likes to see only one stack decrement per
25262              function).  So instead we look for a scratch register into which
25263              we can load the decrement, and then we subtract this from the
25264              stack pointer.  Unfortunately on the thumb the only available
25265              scratch registers are the argument registers, and we cannot use
25266              these as they may hold arguments to the function.  Instead we
25267              attempt to locate a call preserved register which is used by this
25268              function.  If we can find one, then we know that it will have
25269              been pushed at the start of the prologue and so we can corrupt
25270              it now.  */
25271           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25272             if (live_regs_mask & (1 << regno))
25273               break;
25274
25275           gcc_assert(regno <= LAST_LO_REGNUM);
25276
25277           reg = gen_rtx_REG (SImode, regno);
25278
25279           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25280
25281           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25282                                         stack_pointer_rtx, reg));
25283
25284           dwarf = gen_rtx_SET (stack_pointer_rtx,
25285                                plus_constant (Pmode, stack_pointer_rtx,
25286                                               -amount));
25287           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25288           RTX_FRAME_RELATED_P (insn) = 1;
25289         }
25290     }
25291
25292   if (frame_pointer_needed)
25293     thumb_set_frame_pointer (offsets);
25294
25295   /* If we are profiling, make sure no instructions are scheduled before
25296      the call to mcount.  Similarly if the user has requested no
25297      scheduling in the prolog.  Similarly if we want non-call exceptions
25298      using the EABI unwinder, to prevent faulting instructions from being
25299      swapped with a stack adjustment.  */
25300   if (crtl->profile || !TARGET_SCHED_PROLOG
25301       || (arm_except_unwind_info (&global_options) == UI_TARGET
25302           && cfun->can_throw_non_call_exceptions))
25303     emit_insn (gen_blockage ());
25304
25305   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25306   if (live_regs_mask & 0xff)
25307     cfun->machine->lr_save_eliminated = 0;
25308 }
25309
25310 /* Clear caller saved registers not used to pass return values and leaked
25311    condition flags before exiting a cmse_nonsecure_entry function.  */
25312
25313 void
25314 cmse_nonsecure_entry_clear_before_return (void)
25315 {
25316   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25317   uint32_t padding_bits_to_clear = 0;
25318   auto_sbitmap to_clear_bitmap (maxregno + 1);
25319   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25320   tree result_type;
25321
25322   bitmap_clear (to_clear_bitmap);
25323   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25324   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25325
25326   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25327      registers.  */
25328   if (TARGET_HARD_FLOAT)
25329     {
25330       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25331
25332       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25333
25334       /* Make sure we don't clear the two scratch registers used to clear the
25335          relevant FPSCR bits in output_return_instruction.  */
25336       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25337       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25338       emit_use (gen_rtx_REG (SImode, 4));
25339       bitmap_clear_bit (to_clear_bitmap, 4);
25340     }
25341
25342   /* If the user has defined registers to be caller saved, these are no longer
25343      restored by the function before returning and must thus be cleared for
25344      security purposes.  */
25345   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25346     {
25347       /* We do not touch registers that can be used to pass arguments as per
25348          the AAPCS, since these should never be made callee-saved by user
25349          options.  */
25350       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25351         continue;
25352       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25353         continue;
25354       if (call_used_regs[regno])
25355         bitmap_set_bit (to_clear_bitmap, regno);
25356     }
25357
25358   /* Make sure we do not clear the registers used to return the result in.  */
25359   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25360   if (!VOID_TYPE_P (result_type))
25361     {
25362       uint64_t to_clear_return_mask;
25363       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25364
25365       /* No need to check that we return in registers, because we don't
25366          support returning on stack yet.  */
25367       gcc_assert (REG_P (result_rtl));
25368       to_clear_return_mask
25369         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25370                                      &padding_bits_to_clear);
25371       if (to_clear_return_mask)
25372         {
25373           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25374           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25375             {
25376               if (to_clear_return_mask & (1ULL << regno))
25377                 bitmap_clear_bit (to_clear_bitmap, regno);
25378             }
25379         }
25380     }
25381
25382   if (padding_bits_to_clear != 0)
25383     {
25384       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25385       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25386
25387       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25388          returning a composite type, which only uses r0.  Let's make sure that
25389          r1-r3 is cleared too.  */
25390       bitmap_clear (to_clear_arg_regs_bitmap);
25391       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25392       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25393     }
25394
25395   /* Clear full registers that leak before returning.  */
25396   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25397   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25398   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25399                         clearing_reg);
25400 }
25401
25402 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25403    POP instruction can be generated.  LR should be replaced by PC.  All
25404    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25405    all we really need to check here is if single register is to be
25406    returned, or multiple register return.  */
25407 void
25408 thumb2_expand_return (bool simple_return)
25409 {
25410   int i, num_regs;
25411   unsigned long saved_regs_mask;
25412   arm_stack_offsets *offsets;
25413
25414   offsets = arm_get_frame_offsets ();
25415   saved_regs_mask = offsets->saved_regs_mask;
25416
25417   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25418     if (saved_regs_mask & (1 << i))
25419       num_regs++;
25420
25421   if (!simple_return && saved_regs_mask)
25422     {
25423       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25424          functions or adapt code to handle according to ACLE.  This path should
25425          not be reachable for cmse_nonsecure_entry functions though we prefer
25426          to assert it for now to ensure that future code changes do not silently
25427          change this behavior.  */
25428       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25429       if (num_regs == 1)
25430         {
25431           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25432           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25433           rtx addr = gen_rtx_MEM (SImode,
25434                                   gen_rtx_POST_INC (SImode,
25435                                                     stack_pointer_rtx));
25436           set_mem_alias_set (addr, get_frame_alias_set ());
25437           XVECEXP (par, 0, 0) = ret_rtx;
25438           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25439           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25440           emit_jump_insn (par);
25441         }
25442       else
25443         {
25444           saved_regs_mask &= ~ (1 << LR_REGNUM);
25445           saved_regs_mask |=   (1 << PC_REGNUM);
25446           arm_emit_multi_reg_pop (saved_regs_mask);
25447         }
25448     }
25449   else
25450     {
25451       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25452         cmse_nonsecure_entry_clear_before_return ();
25453       emit_jump_insn (simple_return_rtx);
25454     }
25455 }
25456
25457 void
25458 thumb1_expand_epilogue (void)
25459 {
25460   HOST_WIDE_INT amount;
25461   arm_stack_offsets *offsets;
25462   int regno;
25463
25464   /* Naked functions don't have prologues.  */
25465   if (IS_NAKED (arm_current_func_type ()))
25466     return;
25467
25468   offsets = arm_get_frame_offsets ();
25469   amount = offsets->outgoing_args - offsets->saved_regs;
25470
25471   if (frame_pointer_needed)
25472     {
25473       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25474       amount = offsets->locals_base - offsets->saved_regs;
25475     }
25476   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25477
25478   gcc_assert (amount >= 0);
25479   if (amount)
25480     {
25481       emit_insn (gen_blockage ());
25482
25483       if (amount < 512)
25484         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25485                                GEN_INT (amount)));
25486       else
25487         {
25488           /* r3 is always free in the epilogue.  */
25489           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25490
25491           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25492           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25493         }
25494     }
25495
25496   /* Emit a USE (stack_pointer_rtx), so that
25497      the stack adjustment will not be deleted.  */
25498   emit_insn (gen_force_register_use (stack_pointer_rtx));
25499
25500   if (crtl->profile || !TARGET_SCHED_PROLOG)
25501     emit_insn (gen_blockage ());
25502
25503   /* Emit a clobber for each insn that will be restored in the epilogue,
25504      so that flow2 will get register lifetimes correct.  */
25505   for (regno = 0; regno < 13; regno++)
25506     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25507       emit_clobber (gen_rtx_REG (SImode, regno));
25508
25509   if (! df_regs_ever_live_p (LR_REGNUM))
25510     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25511
25512   /* Clear all caller-saved regs that are not used to return.  */
25513   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25514     cmse_nonsecure_entry_clear_before_return ();
25515 }
25516
25517 /* Epilogue code for APCS frame.  */
25518 static void
25519 arm_expand_epilogue_apcs_frame (bool really_return)
25520 {
25521   unsigned long func_type;
25522   unsigned long saved_regs_mask;
25523   int num_regs = 0;
25524   int i;
25525   int floats_from_frame = 0;
25526   arm_stack_offsets *offsets;
25527
25528   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25529   func_type = arm_current_func_type ();
25530
25531   /* Get frame offsets for ARM.  */
25532   offsets = arm_get_frame_offsets ();
25533   saved_regs_mask = offsets->saved_regs_mask;
25534
25535   /* Find the offset of the floating-point save area in the frame.  */
25536   floats_from_frame
25537     = (offsets->saved_args
25538        + arm_compute_static_chain_stack_bytes ()
25539        - offsets->frame);
25540
25541   /* Compute how many core registers saved and how far away the floats are.  */
25542   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25543     if (saved_regs_mask & (1 << i))
25544       {
25545         num_regs++;
25546         floats_from_frame += 4;
25547       }
25548
25549   if (TARGET_HARD_FLOAT)
25550     {
25551       int start_reg;
25552       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25553
25554       /* The offset is from IP_REGNUM.  */
25555       int saved_size = arm_get_vfp_saved_size ();
25556       if (saved_size > 0)
25557         {
25558           rtx_insn *insn;
25559           floats_from_frame += saved_size;
25560           insn = emit_insn (gen_addsi3 (ip_rtx,
25561                                         hard_frame_pointer_rtx,
25562                                         GEN_INT (-floats_from_frame)));
25563           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25564                                        ip_rtx, hard_frame_pointer_rtx);
25565         }
25566
25567       /* Generate VFP register multi-pop.  */
25568       start_reg = FIRST_VFP_REGNUM;
25569
25570       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25571         /* Look for a case where a reg does not need restoring.  */
25572         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25573             && (!df_regs_ever_live_p (i + 1)
25574                 || call_used_regs[i + 1]))
25575           {
25576             if (start_reg != i)
25577               arm_emit_vfp_multi_reg_pop (start_reg,
25578                                           (i - start_reg) / 2,
25579                                           gen_rtx_REG (SImode,
25580                                                        IP_REGNUM));
25581             start_reg = i + 2;
25582           }
25583
25584       /* Restore the remaining regs that we have discovered (or possibly
25585          even all of them, if the conditional in the for loop never
25586          fired).  */
25587       if (start_reg != i)
25588         arm_emit_vfp_multi_reg_pop (start_reg,
25589                                     (i - start_reg) / 2,
25590                                     gen_rtx_REG (SImode, IP_REGNUM));
25591     }
25592
25593   if (TARGET_IWMMXT)
25594     {
25595       /* The frame pointer is guaranteed to be non-double-word aligned, as
25596          it is set to double-word-aligned old_stack_pointer - 4.  */
25597       rtx_insn *insn;
25598       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25599
25600       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25601         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25602           {
25603             rtx addr = gen_frame_mem (V2SImode,
25604                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25605                                                 - lrm_count * 4));
25606             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25607             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25608                                                gen_rtx_REG (V2SImode, i),
25609                                                NULL_RTX);
25610             lrm_count += 2;
25611           }
25612     }
25613
25614   /* saved_regs_mask should contain IP which contains old stack pointer
25615      at the time of activation creation.  Since SP and IP are adjacent registers,
25616      we can restore the value directly into SP.  */
25617   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25618   saved_regs_mask &= ~(1 << IP_REGNUM);
25619   saved_regs_mask |= (1 << SP_REGNUM);
25620
25621   /* There are two registers left in saved_regs_mask - LR and PC.  We
25622      only need to restore LR (the return address), but to
25623      save time we can load it directly into PC, unless we need a
25624      special function exit sequence, or we are not really returning.  */
25625   if (really_return
25626       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25627       && !crtl->calls_eh_return)
25628     /* Delete LR from the register mask, so that LR on
25629        the stack is loaded into the PC in the register mask.  */
25630     saved_regs_mask &= ~(1 << LR_REGNUM);
25631   else
25632     saved_regs_mask &= ~(1 << PC_REGNUM);
25633
25634   num_regs = bit_count (saved_regs_mask);
25635   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25636     {
25637       rtx_insn *insn;
25638       emit_insn (gen_blockage ());
25639       /* Unwind the stack to just below the saved registers.  */
25640       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25641                                     hard_frame_pointer_rtx,
25642                                     GEN_INT (- 4 * num_regs)));
25643
25644       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25645                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25646     }
25647
25648   arm_emit_multi_reg_pop (saved_regs_mask);
25649
25650   if (IS_INTERRUPT (func_type))
25651     {
25652       /* Interrupt handlers will have pushed the
25653          IP onto the stack, so restore it now.  */
25654       rtx_insn *insn;
25655       rtx addr = gen_rtx_MEM (SImode,
25656                               gen_rtx_POST_INC (SImode,
25657                               stack_pointer_rtx));
25658       set_mem_alias_set (addr, get_frame_alias_set ());
25659       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25660       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25661                                          gen_rtx_REG (SImode, IP_REGNUM),
25662                                          NULL_RTX);
25663     }
25664
25665   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25666     return;
25667
25668   if (crtl->calls_eh_return)
25669     emit_insn (gen_addsi3 (stack_pointer_rtx,
25670                            stack_pointer_rtx,
25671                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25672
25673   if (IS_STACKALIGN (func_type))
25674     /* Restore the original stack pointer.  Before prologue, the stack was
25675        realigned and the original stack pointer saved in r0.  For details,
25676        see comment in arm_expand_prologue.  */
25677     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25678
25679   emit_jump_insn (simple_return_rtx);
25680 }
25681
25682 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25683    function is not a sibcall.  */
25684 void
25685 arm_expand_epilogue (bool really_return)
25686 {
25687   unsigned long func_type;
25688   unsigned long saved_regs_mask;
25689   int num_regs = 0;
25690   int i;
25691   int amount;
25692   arm_stack_offsets *offsets;
25693
25694   func_type = arm_current_func_type ();
25695
25696   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25697      let output_return_instruction take care of instruction emission if any.  */
25698   if (IS_NAKED (func_type)
25699       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25700     {
25701       if (really_return)
25702         emit_jump_insn (simple_return_rtx);
25703       return;
25704     }
25705
25706   /* If we are throwing an exception, then we really must be doing a
25707      return, so we can't tail-call.  */
25708   gcc_assert (!crtl->calls_eh_return || really_return);
25709
25710   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25711     {
25712       arm_expand_epilogue_apcs_frame (really_return);
25713       return;
25714     }
25715
25716   /* Get frame offsets for ARM.  */
25717   offsets = arm_get_frame_offsets ();
25718   saved_regs_mask = offsets->saved_regs_mask;
25719   num_regs = bit_count (saved_regs_mask);
25720
25721   if (frame_pointer_needed)
25722     {
25723       rtx_insn *insn;
25724       /* Restore stack pointer if necessary.  */
25725       if (TARGET_ARM)
25726         {
25727           /* In ARM mode, frame pointer points to first saved register.
25728              Restore stack pointer to last saved register.  */
25729           amount = offsets->frame - offsets->saved_regs;
25730
25731           /* Force out any pending memory operations that reference stacked data
25732              before stack de-allocation occurs.  */
25733           emit_insn (gen_blockage ());
25734           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25735                             hard_frame_pointer_rtx,
25736                             GEN_INT (amount)));
25737           arm_add_cfa_adjust_cfa_note (insn, amount,
25738                                        stack_pointer_rtx,
25739                                        hard_frame_pointer_rtx);
25740
25741           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25742              deleted.  */
25743           emit_insn (gen_force_register_use (stack_pointer_rtx));
25744         }
25745       else
25746         {
25747           /* In Thumb-2 mode, the frame pointer points to the last saved
25748              register.  */
25749           amount = offsets->locals_base - offsets->saved_regs;
25750           if (amount)
25751             {
25752               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25753                                 hard_frame_pointer_rtx,
25754                                 GEN_INT (amount)));
25755               arm_add_cfa_adjust_cfa_note (insn, amount,
25756                                            hard_frame_pointer_rtx,
25757                                            hard_frame_pointer_rtx);
25758             }
25759
25760           /* Force out any pending memory operations that reference stacked data
25761              before stack de-allocation occurs.  */
25762           emit_insn (gen_blockage ());
25763           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25764                                        hard_frame_pointer_rtx));
25765           arm_add_cfa_adjust_cfa_note (insn, 0,
25766                                        stack_pointer_rtx,
25767                                        hard_frame_pointer_rtx);
25768           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25769              deleted.  */
25770           emit_insn (gen_force_register_use (stack_pointer_rtx));
25771         }
25772     }
25773   else
25774     {
25775       /* Pop off outgoing args and local frame to adjust stack pointer to
25776          last saved register.  */
25777       amount = offsets->outgoing_args - offsets->saved_regs;
25778       if (amount)
25779         {
25780           rtx_insn *tmp;
25781           /* Force out any pending memory operations that reference stacked data
25782              before stack de-allocation occurs.  */
25783           emit_insn (gen_blockage ());
25784           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25785                                        stack_pointer_rtx,
25786                                        GEN_INT (amount)));
25787           arm_add_cfa_adjust_cfa_note (tmp, amount,
25788                                        stack_pointer_rtx, stack_pointer_rtx);
25789           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25790              not deleted.  */
25791           emit_insn (gen_force_register_use (stack_pointer_rtx));
25792         }
25793     }
25794
25795   if (TARGET_HARD_FLOAT)
25796     {
25797       /* Generate VFP register multi-pop.  */
25798       int end_reg = LAST_VFP_REGNUM + 1;
25799
25800       /* Scan the registers in reverse order.  We need to match
25801          any groupings made in the prologue and generate matching
25802          vldm operations.  The need to match groups is because,
25803          unlike pop, vldm can only do consecutive regs.  */
25804       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25805         /* Look for a case where a reg does not need restoring.  */
25806         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25807             && (!df_regs_ever_live_p (i + 1)
25808                 || call_used_regs[i + 1]))
25809           {
25810             /* Restore the regs discovered so far (from reg+2 to
25811                end_reg).  */
25812             if (end_reg > i + 2)
25813               arm_emit_vfp_multi_reg_pop (i + 2,
25814                                           (end_reg - (i + 2)) / 2,
25815                                           stack_pointer_rtx);
25816             end_reg = i;
25817           }
25818
25819       /* Restore the remaining regs that we have discovered (or possibly
25820          even all of them, if the conditional in the for loop never
25821          fired).  */
25822       if (end_reg > i + 2)
25823         arm_emit_vfp_multi_reg_pop (i + 2,
25824                                     (end_reg - (i + 2)) / 2,
25825                                     stack_pointer_rtx);
25826     }
25827
25828   if (TARGET_IWMMXT)
25829     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25830       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25831         {
25832           rtx_insn *insn;
25833           rtx addr = gen_rtx_MEM (V2SImode,
25834                                   gen_rtx_POST_INC (SImode,
25835                                                     stack_pointer_rtx));
25836           set_mem_alias_set (addr, get_frame_alias_set ());
25837           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25838           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25839                                              gen_rtx_REG (V2SImode, i),
25840                                              NULL_RTX);
25841           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25842                                        stack_pointer_rtx, stack_pointer_rtx);
25843         }
25844
25845   if (saved_regs_mask)
25846     {
25847       rtx insn;
25848       bool return_in_pc = false;
25849
25850       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25851           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25852           && !IS_CMSE_ENTRY (func_type)
25853           && !IS_STACKALIGN (func_type)
25854           && really_return
25855           && crtl->args.pretend_args_size == 0
25856           && saved_regs_mask & (1 << LR_REGNUM)
25857           && !crtl->calls_eh_return)
25858         {
25859           saved_regs_mask &= ~(1 << LR_REGNUM);
25860           saved_regs_mask |= (1 << PC_REGNUM);
25861           return_in_pc = true;
25862         }
25863
25864       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25865         {
25866           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25867             if (saved_regs_mask & (1 << i))
25868               {
25869                 rtx addr = gen_rtx_MEM (SImode,
25870                                         gen_rtx_POST_INC (SImode,
25871                                                           stack_pointer_rtx));
25872                 set_mem_alias_set (addr, get_frame_alias_set ());
25873
25874                 if (i == PC_REGNUM)
25875                   {
25876                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25877                     XVECEXP (insn, 0, 0) = ret_rtx;
25878                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25879                                                         addr);
25880                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25881                     insn = emit_jump_insn (insn);
25882                   }
25883                 else
25884                   {
25885                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25886                                                  addr));
25887                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25888                                                        gen_rtx_REG (SImode, i),
25889                                                        NULL_RTX);
25890                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25891                                                  stack_pointer_rtx,
25892                                                  stack_pointer_rtx);
25893                   }
25894               }
25895         }
25896       else
25897         {
25898           if (TARGET_LDRD
25899               && current_tune->prefer_ldrd_strd
25900               && !optimize_function_for_size_p (cfun))
25901             {
25902               if (TARGET_THUMB2)
25903                 thumb2_emit_ldrd_pop (saved_regs_mask);
25904               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25905                 arm_emit_ldrd_pop (saved_regs_mask);
25906               else
25907                 arm_emit_multi_reg_pop (saved_regs_mask);
25908             }
25909           else
25910             arm_emit_multi_reg_pop (saved_regs_mask);
25911         }
25912
25913       if (return_in_pc)
25914         return;
25915     }
25916
25917   amount
25918     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25919   if (amount)
25920     {
25921       int i, j;
25922       rtx dwarf = NULL_RTX;
25923       rtx_insn *tmp =
25924         emit_insn (gen_addsi3 (stack_pointer_rtx,
25925                                stack_pointer_rtx,
25926                                GEN_INT (amount)));
25927
25928       RTX_FRAME_RELATED_P (tmp) = 1;
25929
25930       if (cfun->machine->uses_anonymous_args)
25931         {
25932           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25933              pretend_args in stack.  */
25934           int num_regs = crtl->args.pretend_args_size / 4;
25935           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25936           for (j = 0, i = 0; j < num_regs; i++)
25937             if (saved_regs_mask & (1 << i))
25938               {
25939                 rtx reg = gen_rtx_REG (SImode, i);
25940                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25941                 j++;
25942               }
25943           REG_NOTES (tmp) = dwarf;
25944         }
25945       arm_add_cfa_adjust_cfa_note (tmp, amount,
25946                                    stack_pointer_rtx, stack_pointer_rtx);
25947     }
25948
25949     /* Clear all caller-saved regs that are not used to return.  */
25950     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25951       {
25952         /* CMSE_ENTRY always returns.  */
25953         gcc_assert (really_return);
25954         cmse_nonsecure_entry_clear_before_return ();
25955       }
25956
25957   if (!really_return)
25958     return;
25959
25960   if (crtl->calls_eh_return)
25961     emit_insn (gen_addsi3 (stack_pointer_rtx,
25962                            stack_pointer_rtx,
25963                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25964
25965   if (IS_STACKALIGN (func_type))
25966     /* Restore the original stack pointer.  Before prologue, the stack was
25967        realigned and the original stack pointer saved in r0.  For details,
25968        see comment in arm_expand_prologue.  */
25969     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25970
25971   emit_jump_insn (simple_return_rtx);
25972 }
25973
25974 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25975    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25976
25977 const char *
25978 thumb1_output_interwork (void)
25979 {
25980   const char * name;
25981   FILE *f = asm_out_file;
25982
25983   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25984   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25985               == SYMBOL_REF);
25986   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25987
25988   /* Generate code sequence to switch us into Thumb mode.  */
25989   /* The .code 32 directive has already been emitted by
25990      ASM_DECLARE_FUNCTION_NAME.  */
25991   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25992   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25993
25994   /* Generate a label, so that the debugger will notice the
25995      change in instruction sets.  This label is also used by
25996      the assembler to bypass the ARM code when this function
25997      is called from a Thumb encoded function elsewhere in the
25998      same file.  Hence the definition of STUB_NAME here must
25999      agree with the definition in gas/config/tc-arm.c.  */
26000
26001 #define STUB_NAME ".real_start_of"
26002
26003   fprintf (f, "\t.code\t16\n");
26004 #ifdef ARM_PE
26005   if (arm_dllexport_name_p (name))
26006     name = arm_strip_name_encoding (name);
26007 #endif
26008   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26009   fprintf (f, "\t.thumb_func\n");
26010   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26011
26012   return "";
26013 }
26014
26015 /* Handle the case of a double word load into a low register from
26016    a computed memory address.  The computed address may involve a
26017    register which is overwritten by the load.  */
26018 const char *
26019 thumb_load_double_from_address (rtx *operands)
26020 {
26021   rtx addr;
26022   rtx base;
26023   rtx offset;
26024   rtx arg1;
26025   rtx arg2;
26026
26027   gcc_assert (REG_P (operands[0]));
26028   gcc_assert (MEM_P (operands[1]));
26029
26030   /* Get the memory address.  */
26031   addr = XEXP (operands[1], 0);
26032
26033   /* Work out how the memory address is computed.  */
26034   switch (GET_CODE (addr))
26035     {
26036     case REG:
26037       operands[2] = adjust_address (operands[1], SImode, 4);
26038
26039       if (REGNO (operands[0]) == REGNO (addr))
26040         {
26041           output_asm_insn ("ldr\t%H0, %2", operands);
26042           output_asm_insn ("ldr\t%0, %1", operands);
26043         }
26044       else
26045         {
26046           output_asm_insn ("ldr\t%0, %1", operands);
26047           output_asm_insn ("ldr\t%H0, %2", operands);
26048         }
26049       break;
26050
26051     case CONST:
26052       /* Compute <address> + 4 for the high order load.  */
26053       operands[2] = adjust_address (operands[1], SImode, 4);
26054
26055       output_asm_insn ("ldr\t%0, %1", operands);
26056       output_asm_insn ("ldr\t%H0, %2", operands);
26057       break;
26058
26059     case PLUS:
26060       arg1   = XEXP (addr, 0);
26061       arg2   = XEXP (addr, 1);
26062
26063       if (CONSTANT_P (arg1))
26064         base = arg2, offset = arg1;
26065       else
26066         base = arg1, offset = arg2;
26067
26068       gcc_assert (REG_P (base));
26069
26070       /* Catch the case of <address> = <reg> + <reg> */
26071       if (REG_P (offset))
26072         {
26073           int reg_offset = REGNO (offset);
26074           int reg_base   = REGNO (base);
26075           int reg_dest   = REGNO (operands[0]);
26076
26077           /* Add the base and offset registers together into the
26078              higher destination register.  */
26079           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26080                        reg_dest + 1, reg_base, reg_offset);
26081
26082           /* Load the lower destination register from the address in
26083              the higher destination register.  */
26084           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26085                        reg_dest, reg_dest + 1);
26086
26087           /* Load the higher destination register from its own address
26088              plus 4.  */
26089           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26090                        reg_dest + 1, reg_dest + 1);
26091         }
26092       else
26093         {
26094           /* Compute <address> + 4 for the high order load.  */
26095           operands[2] = adjust_address (operands[1], SImode, 4);
26096
26097           /* If the computed address is held in the low order register
26098              then load the high order register first, otherwise always
26099              load the low order register first.  */
26100           if (REGNO (operands[0]) == REGNO (base))
26101             {
26102               output_asm_insn ("ldr\t%H0, %2", operands);
26103               output_asm_insn ("ldr\t%0, %1", operands);
26104             }
26105           else
26106             {
26107               output_asm_insn ("ldr\t%0, %1", operands);
26108               output_asm_insn ("ldr\t%H0, %2", operands);
26109             }
26110         }
26111       break;
26112
26113     case LABEL_REF:
26114       /* With no registers to worry about we can just load the value
26115          directly.  */
26116       operands[2] = adjust_address (operands[1], SImode, 4);
26117
26118       output_asm_insn ("ldr\t%H0, %2", operands);
26119       output_asm_insn ("ldr\t%0, %1", operands);
26120       break;
26121
26122     default:
26123       gcc_unreachable ();
26124     }
26125
26126   return "";
26127 }
26128
26129 const char *
26130 thumb_output_move_mem_multiple (int n, rtx *operands)
26131 {
26132   switch (n)
26133     {
26134     case 2:
26135       if (REGNO (operands[4]) > REGNO (operands[5]))
26136         std::swap (operands[4], operands[5]);
26137
26138       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26139       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26140       break;
26141
26142     case 3:
26143       if (REGNO (operands[4]) > REGNO (operands[5]))
26144         std::swap (operands[4], operands[5]);
26145       if (REGNO (operands[5]) > REGNO (operands[6]))
26146         std::swap (operands[5], operands[6]);
26147       if (REGNO (operands[4]) > REGNO (operands[5]))
26148         std::swap (operands[4], operands[5]);
26149
26150       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26151       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26152       break;
26153
26154     default:
26155       gcc_unreachable ();
26156     }
26157
26158   return "";
26159 }
26160
26161 /* Output a call-via instruction for thumb state.  */
26162 const char *
26163 thumb_call_via_reg (rtx reg)
26164 {
26165   int regno = REGNO (reg);
26166   rtx *labelp;
26167
26168   gcc_assert (regno < LR_REGNUM);
26169
26170   /* If we are in the normal text section we can use a single instance
26171      per compilation unit.  If we are doing function sections, then we need
26172      an entry per section, since we can't rely on reachability.  */
26173   if (in_section == text_section)
26174     {
26175       thumb_call_reg_needed = 1;
26176
26177       if (thumb_call_via_label[regno] == NULL)
26178         thumb_call_via_label[regno] = gen_label_rtx ();
26179       labelp = thumb_call_via_label + regno;
26180     }
26181   else
26182     {
26183       if (cfun->machine->call_via[regno] == NULL)
26184         cfun->machine->call_via[regno] = gen_label_rtx ();
26185       labelp = cfun->machine->call_via + regno;
26186     }
26187
26188   output_asm_insn ("bl\t%a0", labelp);
26189   return "";
26190 }
26191
26192 /* Routines for generating rtl.  */
26193 void
26194 thumb_expand_movmemqi (rtx *operands)
26195 {
26196   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26197   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26198   HOST_WIDE_INT len = INTVAL (operands[2]);
26199   HOST_WIDE_INT offset = 0;
26200
26201   while (len >= 12)
26202     {
26203       emit_insn (gen_movmem12b (out, in, out, in));
26204       len -= 12;
26205     }
26206
26207   if (len >= 8)
26208     {
26209       emit_insn (gen_movmem8b (out, in, out, in));
26210       len -= 8;
26211     }
26212
26213   if (len >= 4)
26214     {
26215       rtx reg = gen_reg_rtx (SImode);
26216       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26217       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26218       len -= 4;
26219       offset += 4;
26220     }
26221
26222   if (len >= 2)
26223     {
26224       rtx reg = gen_reg_rtx (HImode);
26225       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26226                                               plus_constant (Pmode, in,
26227                                                              offset))));
26228       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26229                                                                 offset)),
26230                             reg));
26231       len -= 2;
26232       offset += 2;
26233     }
26234
26235   if (len)
26236     {
26237       rtx reg = gen_reg_rtx (QImode);
26238       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26239                                               plus_constant (Pmode, in,
26240                                                              offset))));
26241       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26242                                                                 offset)),
26243                             reg));
26244     }
26245 }
26246
26247 void
26248 thumb_reload_out_hi (rtx *operands)
26249 {
26250   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26251 }
26252
26253 /* Return the length of a function name prefix
26254     that starts with the character 'c'.  */
26255 static int
26256 arm_get_strip_length (int c)
26257 {
26258   switch (c)
26259     {
26260     ARM_NAME_ENCODING_LENGTHS
26261       default: return 0;
26262     }
26263 }
26264
26265 /* Return a pointer to a function's name with any
26266    and all prefix encodings stripped from it.  */
26267 const char *
26268 arm_strip_name_encoding (const char *name)
26269 {
26270   int skip;
26271
26272   while ((skip = arm_get_strip_length (* name)))
26273     name += skip;
26274
26275   return name;
26276 }
26277
26278 /* If there is a '*' anywhere in the name's prefix, then
26279    emit the stripped name verbatim, otherwise prepend an
26280    underscore if leading underscores are being used.  */
26281 void
26282 arm_asm_output_labelref (FILE *stream, const char *name)
26283 {
26284   int skip;
26285   int verbatim = 0;
26286
26287   while ((skip = arm_get_strip_length (* name)))
26288     {
26289       verbatim |= (*name == '*');
26290       name += skip;
26291     }
26292
26293   if (verbatim)
26294     fputs (name, stream);
26295   else
26296     asm_fprintf (stream, "%U%s", name);
26297 }
26298
26299 /* This function is used to emit an EABI tag and its associated value.
26300    We emit the numerical value of the tag in case the assembler does not
26301    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26302    the tag name in a comment so that anyone reading the assembler output
26303    will know which tag is being set.
26304
26305    This function is not static because arm-c.c needs it too.  */
26306
26307 void
26308 arm_emit_eabi_attribute (const char *name, int num, int val)
26309 {
26310   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26311   if (flag_verbose_asm || flag_debug_asm)
26312     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26313   asm_fprintf (asm_out_file, "\n");
26314 }
26315
26316 /* This function is used to print CPU tuning information as comment
26317    in assembler file.  Pointers are not printed for now.  */
26318
26319 void
26320 arm_print_tune_info (void)
26321 {
26322   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26323   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26324                current_tune->constant_limit);
26325   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26326                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26327   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26328                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26329   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26330                "prefetch.l1_cache_size:\t%d\n",
26331                current_tune->prefetch.l1_cache_size);
26332   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26333                "prefetch.l1_cache_line_size:\t%d\n",
26334                current_tune->prefetch.l1_cache_line_size);
26335   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26336                "prefer_constant_pool:\t%d\n",
26337                (int) current_tune->prefer_constant_pool);
26338   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339                "branch_cost:\t(s:speed, p:predictable)\n");
26340   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26341   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26342                current_tune->branch_cost (false, false));
26343   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26344                current_tune->branch_cost (false, true));
26345   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26346                current_tune->branch_cost (true, false));
26347   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26348                current_tune->branch_cost (true, true));
26349   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26350                "prefer_ldrd_strd:\t%d\n",
26351                (int) current_tune->prefer_ldrd_strd);
26352   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26353                "logical_op_non_short_circuit:\t[%d,%d]\n",
26354                (int) current_tune->logical_op_non_short_circuit_thumb,
26355                (int) current_tune->logical_op_non_short_circuit_arm);
26356   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26357                "prefer_neon_for_64bits:\t%d\n",
26358                (int) current_tune->prefer_neon_for_64bits);
26359   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26360                "disparage_flag_setting_t16_encodings:\t%d\n",
26361                (int) current_tune->disparage_flag_setting_t16_encodings);
26362   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26363                "string_ops_prefer_neon:\t%d\n",
26364                (int) current_tune->string_ops_prefer_neon);
26365   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26366                "max_insns_inline_memset:\t%d\n",
26367                current_tune->max_insns_inline_memset);
26368   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26369                current_tune->fusible_ops);
26370   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26371                (int) current_tune->sched_autopref);
26372 }
26373
26374 /* Print .arch and .arch_extension directives corresponding to the
26375    current architecture configuration.  */
26376 static void
26377 arm_print_asm_arch_directives ()
26378 {
26379   const arch_option *arch
26380     = arm_parse_arch_option_name (all_architectures, "-march",
26381                                   arm_active_target.arch_name);
26382   auto_sbitmap opt_bits (isa_num_bits);
26383
26384   gcc_assert (arch);
26385
26386   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26387   arm_last_printed_arch_string = arm_active_target.arch_name;
26388   if (!arch->common.extensions)
26389     return;
26390
26391   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26392        opt->name != NULL;
26393        opt++)
26394     {
26395       if (!opt->remove)
26396         {
26397           arm_initialize_isa (opt_bits, opt->isa_bits);
26398
26399           /* If every feature bit of this option is set in the target
26400              ISA specification, print out the option name.  However,
26401              don't print anything if all the bits are part of the
26402              FPU specification.  */
26403           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26404               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26405             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26406         }
26407     }
26408 }
26409
26410 static void
26411 arm_file_start (void)
26412 {
26413   int val;
26414
26415   if (TARGET_BPABI)
26416     {
26417       /* We don't have a specified CPU.  Use the architecture to
26418          generate the tags.
26419
26420          Note: it might be better to do this unconditionally, then the
26421          assembler would not need to know about all new CPU names as
26422          they are added.  */
26423       if (!arm_active_target.core_name)
26424         {
26425           /* armv7ve doesn't support any extensions.  */
26426           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26427             {
26428               /* Keep backward compatability for assemblers
26429                  which don't support armv7ve.  */
26430               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26431               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26432               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26433               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26434               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26435               arm_last_printed_arch_string = "armv7ve";
26436             }
26437           else
26438             arm_print_asm_arch_directives ();
26439         }
26440       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26441         {
26442           asm_fprintf (asm_out_file, "\t.arch %s\n",
26443                        arm_active_target.core_name + 8);
26444           arm_last_printed_arch_string = arm_active_target.core_name + 8;
26445         }
26446       else
26447         {
26448           const char* truncated_name
26449             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26450           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26451         }
26452
26453       if (print_tune_info)
26454         arm_print_tune_info ();
26455
26456       if (! TARGET_SOFT_FLOAT)
26457         {
26458           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26459             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26460
26461           if (TARGET_HARD_FLOAT_ABI)
26462             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26463         }
26464
26465       /* Some of these attributes only apply when the corresponding features
26466          are used.  However we don't have any easy way of figuring this out.
26467          Conservatively record the setting that would have been used.  */
26468
26469       if (flag_rounding_math)
26470         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26471
26472       if (!flag_unsafe_math_optimizations)
26473         {
26474           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26475           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26476         }
26477       if (flag_signaling_nans)
26478         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26479
26480       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26481                            flag_finite_math_only ? 1 : 3);
26482
26483       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26484       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26485       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26486                                flag_short_enums ? 1 : 2);
26487
26488       /* Tag_ABI_optimization_goals.  */
26489       if (optimize_size)
26490         val = 4;
26491       else if (optimize >= 2)
26492         val = 2;
26493       else if (optimize)
26494         val = 1;
26495       else
26496         val = 6;
26497       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26498
26499       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26500                                unaligned_access);
26501
26502       if (arm_fp16_format)
26503         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26504                              (int) arm_fp16_format);
26505
26506       if (arm_lang_output_object_attributes_hook)
26507         arm_lang_output_object_attributes_hook();
26508     }
26509
26510   default_file_start ();
26511 }
26512
26513 static void
26514 arm_file_end (void)
26515 {
26516   int regno;
26517
26518   if (NEED_INDICATE_EXEC_STACK)
26519     /* Add .note.GNU-stack.  */
26520     file_end_indicate_exec_stack ();
26521
26522   if (! thumb_call_reg_needed)
26523     return;
26524
26525   switch_to_section (text_section);
26526   asm_fprintf (asm_out_file, "\t.code 16\n");
26527   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26528
26529   for (regno = 0; regno < LR_REGNUM; regno++)
26530     {
26531       rtx label = thumb_call_via_label[regno];
26532
26533       if (label != 0)
26534         {
26535           targetm.asm_out.internal_label (asm_out_file, "L",
26536                                           CODE_LABEL_NUMBER (label));
26537           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26538         }
26539     }
26540 }
26541
26542 #ifndef ARM_PE
26543 /* Symbols in the text segment can be accessed without indirecting via the
26544    constant pool; it may take an extra binary operation, but this is still
26545    faster than indirecting via memory.  Don't do this when not optimizing,
26546    since we won't be calculating al of the offsets necessary to do this
26547    simplification.  */
26548
26549 static void
26550 arm_encode_section_info (tree decl, rtx rtl, int first)
26551 {
26552   if (optimize > 0 && TREE_CONSTANT (decl))
26553     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26554
26555   default_encode_section_info (decl, rtl, first);
26556 }
26557 #endif /* !ARM_PE */
26558
26559 static void
26560 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26561 {
26562   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26563       && !strcmp (prefix, "L"))
26564     {
26565       arm_ccfsm_state = 0;
26566       arm_target_insn = NULL;
26567     }
26568   default_internal_label (stream, prefix, labelno);
26569 }
26570
26571 /* Output code to add DELTA to the first argument, and then jump
26572    to FUNCTION.  Used for C++ multiple inheritance.  */
26573
26574 static void
26575 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26576                      HOST_WIDE_INT, tree function)
26577 {
26578   static int thunk_label = 0;
26579   char label[256];
26580   char labelpc[256];
26581   int mi_delta = delta;
26582   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26583   int shift = 0;
26584   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26585                     ? 1 : 0);
26586   if (mi_delta < 0)
26587     mi_delta = - mi_delta;
26588
26589   final_start_function (emit_barrier (), file, 1);
26590
26591   if (TARGET_THUMB1)
26592     {
26593       int labelno = thunk_label++;
26594       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26595       /* Thunks are entered in arm mode when available.  */
26596       if (TARGET_THUMB1_ONLY)
26597         {
26598           /* push r3 so we can use it as a temporary.  */
26599           /* TODO: Omit this save if r3 is not used.  */
26600           fputs ("\tpush {r3}\n", file);
26601           fputs ("\tldr\tr3, ", file);
26602         }
26603       else
26604         {
26605           fputs ("\tldr\tr12, ", file);
26606         }
26607       assemble_name (file, label);
26608       fputc ('\n', file);
26609       if (flag_pic)
26610         {
26611           /* If we are generating PIC, the ldr instruction below loads
26612              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26613              the address of the add + 8, so we have:
26614
26615              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26616                  = target + 1.
26617
26618              Note that we have "+ 1" because some versions of GNU ld
26619              don't set the low bit of the result for R_ARM_REL32
26620              relocations against thumb function symbols.
26621              On ARMv6M this is +4, not +8.  */
26622           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26623           assemble_name (file, labelpc);
26624           fputs (":\n", file);
26625           if (TARGET_THUMB1_ONLY)
26626             {
26627               /* This is 2 insns after the start of the thunk, so we know it
26628                  is 4-byte aligned.  */
26629               fputs ("\tadd\tr3, pc, r3\n", file);
26630               fputs ("\tmov r12, r3\n", file);
26631             }
26632           else
26633             fputs ("\tadd\tr12, pc, r12\n", file);
26634         }
26635       else if (TARGET_THUMB1_ONLY)
26636         fputs ("\tmov r12, r3\n", file);
26637     }
26638   if (TARGET_THUMB1_ONLY)
26639     {
26640       if (mi_delta > 255)
26641         {
26642           fputs ("\tldr\tr3, ", file);
26643           assemble_name (file, label);
26644           fputs ("+4\n", file);
26645           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26646                        mi_op, this_regno, this_regno);
26647         }
26648       else if (mi_delta != 0)
26649         {
26650           /* Thumb1 unified syntax requires s suffix in instruction name when
26651              one of the operands is immediate.  */
26652           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26653                        mi_op, this_regno, this_regno,
26654                        mi_delta);
26655         }
26656     }
26657   else
26658     {
26659       /* TODO: Use movw/movt for large constants when available.  */
26660       while (mi_delta != 0)
26661         {
26662           if ((mi_delta & (3 << shift)) == 0)
26663             shift += 2;
26664           else
26665             {
26666               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26667                            mi_op, this_regno, this_regno,
26668                            mi_delta & (0xff << shift));
26669               mi_delta &= ~(0xff << shift);
26670               shift += 8;
26671             }
26672         }
26673     }
26674   if (TARGET_THUMB1)
26675     {
26676       if (TARGET_THUMB1_ONLY)
26677         fputs ("\tpop\t{r3}\n", file);
26678
26679       fprintf (file, "\tbx\tr12\n");
26680       ASM_OUTPUT_ALIGN (file, 2);
26681       assemble_name (file, label);
26682       fputs (":\n", file);
26683       if (flag_pic)
26684         {
26685           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26686           rtx tem = XEXP (DECL_RTL (function), 0);
26687           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26688              pipeline offset is four rather than eight.  Adjust the offset
26689              accordingly.  */
26690           tem = plus_constant (GET_MODE (tem), tem,
26691                                TARGET_THUMB1_ONLY ? -3 : -7);
26692           tem = gen_rtx_MINUS (GET_MODE (tem),
26693                                tem,
26694                                gen_rtx_SYMBOL_REF (Pmode,
26695                                                    ggc_strdup (labelpc)));
26696           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26697         }
26698       else
26699         /* Output ".word .LTHUNKn".  */
26700         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26701
26702       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26703         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26704     }
26705   else
26706     {
26707       fputs ("\tb\t", file);
26708       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26709       if (NEED_PLT_RELOC)
26710         fputs ("(PLT)", file);
26711       fputc ('\n', file);
26712     }
26713
26714   final_end_function ();
26715 }
26716
26717 /* MI thunk handling for TARGET_32BIT.  */
26718
26719 static void
26720 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26721                        HOST_WIDE_INT vcall_offset, tree function)
26722 {
26723   /* On ARM, this_regno is R0 or R1 depending on
26724      whether the function returns an aggregate or not.
26725   */
26726   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26727                                        function)
26728                     ? R1_REGNUM : R0_REGNUM);
26729
26730   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26731   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26732   reload_completed = 1;
26733   emit_note (NOTE_INSN_PROLOGUE_END);
26734
26735   /* Add DELTA to THIS_RTX.  */
26736   if (delta != 0)
26737     arm_split_constant (PLUS, Pmode, NULL_RTX,
26738                         delta, this_rtx, this_rtx, false);
26739
26740   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26741   if (vcall_offset != 0)
26742     {
26743       /* Load *THIS_RTX.  */
26744       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26745       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26746       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26747                           false);
26748       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26749       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26750       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26751     }
26752
26753   /* Generate a tail call to the target function.  */
26754   if (!TREE_USED (function))
26755     {
26756       assemble_external (function);
26757       TREE_USED (function) = 1;
26758     }
26759   rtx funexp = XEXP (DECL_RTL (function), 0);
26760   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26761   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26762   SIBLING_CALL_P (insn) = 1;
26763
26764   insn = get_insns ();
26765   shorten_branches (insn);
26766   final_start_function (insn, file, 1);
26767   final (insn, file, 1);
26768   final_end_function ();
26769
26770   /* Stop pretending this is a post-reload pass.  */
26771   reload_completed = 0;
26772 }
26773
26774 /* Output code to add DELTA to the first argument, and then jump
26775    to FUNCTION.  Used for C++ multiple inheritance.  */
26776
26777 static void
26778 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26779                      HOST_WIDE_INT vcall_offset, tree function)
26780 {
26781   if (TARGET_32BIT)
26782     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26783   else
26784     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26785 }
26786
26787 int
26788 arm_emit_vector_const (FILE *file, rtx x)
26789 {
26790   int i;
26791   const char * pattern;
26792
26793   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26794
26795   switch (GET_MODE (x))
26796     {
26797     case E_V2SImode: pattern = "%08x"; break;
26798     case E_V4HImode: pattern = "%04x"; break;
26799     case E_V8QImode: pattern = "%02x"; break;
26800     default:       gcc_unreachable ();
26801     }
26802
26803   fprintf (file, "0x");
26804   for (i = CONST_VECTOR_NUNITS (x); i--;)
26805     {
26806       rtx element;
26807
26808       element = CONST_VECTOR_ELT (x, i);
26809       fprintf (file, pattern, INTVAL (element));
26810     }
26811
26812   return 1;
26813 }
26814
26815 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26816    HFmode constant pool entries are actually loaded with ldr.  */
26817 void
26818 arm_emit_fp16_const (rtx c)
26819 {
26820   long bits;
26821
26822   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26823   if (WORDS_BIG_ENDIAN)
26824     assemble_zeros (2);
26825   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26826   if (!WORDS_BIG_ENDIAN)
26827     assemble_zeros (2);
26828 }
26829
26830 const char *
26831 arm_output_load_gr (rtx *operands)
26832 {
26833   rtx reg;
26834   rtx offset;
26835   rtx wcgr;
26836   rtx sum;
26837
26838   if (!MEM_P (operands [1])
26839       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26840       || !REG_P (reg = XEXP (sum, 0))
26841       || !CONST_INT_P (offset = XEXP (sum, 1))
26842       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26843     return "wldrw%?\t%0, %1";
26844
26845   /* Fix up an out-of-range load of a GR register.  */
26846   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26847   wcgr = operands[0];
26848   operands[0] = reg;
26849   output_asm_insn ("ldr%?\t%0, %1", operands);
26850
26851   operands[0] = wcgr;
26852   operands[1] = reg;
26853   output_asm_insn ("tmcr%?\t%0, %1", operands);
26854   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26855
26856   return "";
26857 }
26858
26859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26860
26861    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26862    named arg and all anonymous args onto the stack.
26863    XXX I know the prologue shouldn't be pushing registers, but it is faster
26864    that way.  */
26865
26866 static void
26867 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26868                             machine_mode mode,
26869                             tree type,
26870                             int *pretend_size,
26871                             int second_time ATTRIBUTE_UNUSED)
26872 {
26873   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26874   int nregs;
26875
26876   cfun->machine->uses_anonymous_args = 1;
26877   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26878     {
26879       nregs = pcum->aapcs_ncrn;
26880       if (nregs & 1)
26881         {
26882           int res = arm_needs_doubleword_align (mode, type);
26883           if (res < 0 && warn_psabi)
26884             inform (input_location, "parameter passing for argument of "
26885                     "type %qT changed in GCC 7.1", type);
26886           else if (res > 0)
26887             nregs++;
26888         }
26889     }
26890   else
26891     nregs = pcum->nregs;
26892
26893   if (nregs < NUM_ARG_REGS)
26894     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26895 }
26896
26897 /* We can't rely on the caller doing the proper promotion when
26898    using APCS or ATPCS.  */
26899
26900 static bool
26901 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26902 {
26903     return !TARGET_AAPCS_BASED;
26904 }
26905
26906 static machine_mode
26907 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26908                            machine_mode mode,
26909                            int *punsignedp ATTRIBUTE_UNUSED,
26910                            const_tree fntype ATTRIBUTE_UNUSED,
26911                            int for_return ATTRIBUTE_UNUSED)
26912 {
26913   if (GET_MODE_CLASS (mode) == MODE_INT
26914       && GET_MODE_SIZE (mode) < 4)
26915     return SImode;
26916
26917   return mode;
26918 }
26919
26920
26921 static bool
26922 arm_default_short_enums (void)
26923 {
26924   return ARM_DEFAULT_SHORT_ENUMS;
26925 }
26926
26927
26928 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26929
26930 static bool
26931 arm_align_anon_bitfield (void)
26932 {
26933   return TARGET_AAPCS_BASED;
26934 }
26935
26936
26937 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26938
26939 static tree
26940 arm_cxx_guard_type (void)
26941 {
26942   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26943 }
26944
26945
26946 /* The EABI says test the least significant bit of a guard variable.  */
26947
26948 static bool
26949 arm_cxx_guard_mask_bit (void)
26950 {
26951   return TARGET_AAPCS_BASED;
26952 }
26953
26954
26955 /* The EABI specifies that all array cookies are 8 bytes long.  */
26956
26957 static tree
26958 arm_get_cookie_size (tree type)
26959 {
26960   tree size;
26961
26962   if (!TARGET_AAPCS_BASED)
26963     return default_cxx_get_cookie_size (type);
26964
26965   size = build_int_cst (sizetype, 8);
26966   return size;
26967 }
26968
26969
26970 /* The EABI says that array cookies should also contain the element size.  */
26971
26972 static bool
26973 arm_cookie_has_size (void)
26974 {
26975   return TARGET_AAPCS_BASED;
26976 }
26977
26978
26979 /* The EABI says constructors and destructors should return a pointer to
26980    the object constructed/destroyed.  */
26981
26982 static bool
26983 arm_cxx_cdtor_returns_this (void)
26984 {
26985   return TARGET_AAPCS_BASED;
26986 }
26987
26988 /* The EABI says that an inline function may never be the key
26989    method.  */
26990
26991 static bool
26992 arm_cxx_key_method_may_be_inline (void)
26993 {
26994   return !TARGET_AAPCS_BASED;
26995 }
26996
26997 static void
26998 arm_cxx_determine_class_data_visibility (tree decl)
26999 {
27000   if (!TARGET_AAPCS_BASED
27001       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27002     return;
27003
27004   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27005      is exported.  However, on systems without dynamic vague linkage,
27006      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27007   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27008     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27009   else
27010     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27011   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27012 }
27013
27014 static bool
27015 arm_cxx_class_data_always_comdat (void)
27016 {
27017   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27018      vague linkage if the class has no key function.  */
27019   return !TARGET_AAPCS_BASED;
27020 }
27021
27022
27023 /* The EABI says __aeabi_atexit should be used to register static
27024    destructors.  */
27025
27026 static bool
27027 arm_cxx_use_aeabi_atexit (void)
27028 {
27029   return TARGET_AAPCS_BASED;
27030 }
27031
27032
27033 void
27034 arm_set_return_address (rtx source, rtx scratch)
27035 {
27036   arm_stack_offsets *offsets;
27037   HOST_WIDE_INT delta;
27038   rtx addr, mem;
27039   unsigned long saved_regs;
27040
27041   offsets = arm_get_frame_offsets ();
27042   saved_regs = offsets->saved_regs_mask;
27043
27044   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27045     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27046   else
27047     {
27048       if (frame_pointer_needed)
27049         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27050       else
27051         {
27052           /* LR will be the first saved register.  */
27053           delta = offsets->outgoing_args - (offsets->frame + 4);
27054
27055
27056           if (delta >= 4096)
27057             {
27058               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27059                                      GEN_INT (delta & ~4095)));
27060               addr = scratch;
27061               delta &= 4095;
27062             }
27063           else
27064             addr = stack_pointer_rtx;
27065
27066           addr = plus_constant (Pmode, addr, delta);
27067         }
27068
27069       /* The store needs to be marked to prevent DSE from deleting
27070          it as dead if it is based on fp.  */
27071       mem = gen_frame_mem (Pmode, addr);
27072       MEM_VOLATILE_P (mem) = true;
27073       emit_move_insn (mem, source);
27074     }
27075 }
27076
27077
27078 void
27079 thumb_set_return_address (rtx source, rtx scratch)
27080 {
27081   arm_stack_offsets *offsets;
27082   HOST_WIDE_INT delta;
27083   HOST_WIDE_INT limit;
27084   int reg;
27085   rtx addr, mem;
27086   unsigned long mask;
27087
27088   emit_use (source);
27089
27090   offsets = arm_get_frame_offsets ();
27091   mask = offsets->saved_regs_mask;
27092   if (mask & (1 << LR_REGNUM))
27093     {
27094       limit = 1024;
27095       /* Find the saved regs.  */
27096       if (frame_pointer_needed)
27097         {
27098           delta = offsets->soft_frame - offsets->saved_args;
27099           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27100           if (TARGET_THUMB1)
27101             limit = 128;
27102         }
27103       else
27104         {
27105           delta = offsets->outgoing_args - offsets->saved_args;
27106           reg = SP_REGNUM;
27107         }
27108       /* Allow for the stack frame.  */
27109       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27110         delta -= 16;
27111       /* The link register is always the first saved register.  */
27112       delta -= 4;
27113
27114       /* Construct the address.  */
27115       addr = gen_rtx_REG (SImode, reg);
27116       if (delta > limit)
27117         {
27118           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27119           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27120           addr = scratch;
27121         }
27122       else
27123         addr = plus_constant (Pmode, addr, delta);
27124
27125       /* The store needs to be marked to prevent DSE from deleting
27126          it as dead if it is based on fp.  */
27127       mem = gen_frame_mem (Pmode, addr);
27128       MEM_VOLATILE_P (mem) = true;
27129       emit_move_insn (mem, source);
27130     }
27131   else
27132     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27133 }
27134
27135 /* Implements target hook vector_mode_supported_p.  */
27136 bool
27137 arm_vector_mode_supported_p (machine_mode mode)
27138 {
27139   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27140   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27141       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27142       || mode == V2DImode || mode == V8HFmode))
27143     return true;
27144
27145   if ((TARGET_NEON || TARGET_IWMMXT)
27146       && ((mode == V2SImode)
27147           || (mode == V4HImode)
27148           || (mode == V8QImode)))
27149     return true;
27150
27151   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27152       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27153       || mode == V2HAmode))
27154     return true;
27155
27156   return false;
27157 }
27158
27159 /* Implements target hook array_mode_supported_p.  */
27160
27161 static bool
27162 arm_array_mode_supported_p (machine_mode mode,
27163                             unsigned HOST_WIDE_INT nelems)
27164 {
27165   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27166      for now, as the lane-swapping logic needs to be extended in the expanders.
27167      See PR target/82518.  */
27168   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27169       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27170       && (nelems >= 2 && nelems <= 4))
27171     return true;
27172
27173   return false;
27174 }
27175
27176 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27177    registers when autovectorizing for Neon, at least until multiple vector
27178    widths are supported properly by the middle-end.  */
27179
27180 static machine_mode
27181 arm_preferred_simd_mode (scalar_mode mode)
27182 {
27183   if (TARGET_NEON)
27184     switch (mode)
27185       {
27186       case E_SFmode:
27187         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27188       case E_SImode:
27189         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27190       case E_HImode:
27191         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27192       case E_QImode:
27193         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27194       case E_DImode:
27195         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27196           return V2DImode;
27197         break;
27198
27199       default:;
27200       }
27201
27202   if (TARGET_REALLY_IWMMXT)
27203     switch (mode)
27204       {
27205       case E_SImode:
27206         return V2SImode;
27207       case E_HImode:
27208         return V4HImode;
27209       case E_QImode:
27210         return V8QImode;
27211
27212       default:;
27213       }
27214
27215   return word_mode;
27216 }
27217
27218 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27219
27220    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27221    using r0-r4 for function arguments, r7 for the stack frame and don't have
27222    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27223    potentially problematic instructions accept high registers so this is not
27224    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27225    that require many low registers.  */
27226 static bool
27227 arm_class_likely_spilled_p (reg_class_t rclass)
27228 {
27229   if ((TARGET_THUMB1 && rclass == LO_REGS)
27230       || rclass  == CC_REG)
27231     return true;
27232
27233   return false;
27234 }
27235
27236 /* Implements target hook small_register_classes_for_mode_p.  */
27237 bool
27238 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27239 {
27240   return TARGET_THUMB1;
27241 }
27242
27243 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27244    ARM insns and therefore guarantee that the shift count is modulo 256.
27245    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27246    guarantee no particular behavior for out-of-range counts.  */
27247
27248 static unsigned HOST_WIDE_INT
27249 arm_shift_truncation_mask (machine_mode mode)
27250 {
27251   return mode == SImode ? 255 : 0;
27252 }
27253
27254
27255 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27256
27257 unsigned int
27258 arm_dbx_register_number (unsigned int regno)
27259 {
27260   if (regno < 16)
27261     return regno;
27262
27263   if (IS_VFP_REGNUM (regno))
27264     {
27265       /* See comment in arm_dwarf_register_span.  */
27266       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27267         return 64 + regno - FIRST_VFP_REGNUM;
27268       else
27269         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27270     }
27271
27272   if (IS_IWMMXT_GR_REGNUM (regno))
27273     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27274
27275   if (IS_IWMMXT_REGNUM (regno))
27276     return 112 + regno - FIRST_IWMMXT_REGNUM;
27277
27278   return DWARF_FRAME_REGISTERS;
27279 }
27280
27281 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27282    GCC models tham as 64 32-bit registers, so we need to describe this to
27283    the DWARF generation code.  Other registers can use the default.  */
27284 static rtx
27285 arm_dwarf_register_span (rtx rtl)
27286 {
27287   machine_mode mode;
27288   unsigned regno;
27289   rtx parts[16];
27290   int nregs;
27291   int i;
27292
27293   regno = REGNO (rtl);
27294   if (!IS_VFP_REGNUM (regno))
27295     return NULL_RTX;
27296
27297   /* XXX FIXME: The EABI defines two VFP register ranges:
27298         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27299         256-287: D0-D31
27300      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27301      corresponding D register.  Until GDB supports this, we shall use the
27302      legacy encodings.  We also use these encodings for D0-D15 for
27303      compatibility with older debuggers.  */
27304   mode = GET_MODE (rtl);
27305   if (GET_MODE_SIZE (mode) < 8)
27306     return NULL_RTX;
27307
27308   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27309     {
27310       nregs = GET_MODE_SIZE (mode) / 4;
27311       for (i = 0; i < nregs; i += 2)
27312         if (TARGET_BIG_END)
27313           {
27314             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27315             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27316           }
27317         else
27318           {
27319             parts[i] = gen_rtx_REG (SImode, regno + i);
27320             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27321           }
27322     }
27323   else
27324     {
27325       nregs = GET_MODE_SIZE (mode) / 8;
27326       for (i = 0; i < nregs; i++)
27327         parts[i] = gen_rtx_REG (DImode, regno + i);
27328     }
27329
27330   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27331 }
27332
27333 #if ARM_UNWIND_INFO
27334 /* Emit unwind directives for a store-multiple instruction or stack pointer
27335    push during alignment.
27336    These should only ever be generated by the function prologue code, so
27337    expect them to have a particular form.
27338    The store-multiple instruction sometimes pushes pc as the last register,
27339    although it should not be tracked into unwind information, or for -Os
27340    sometimes pushes some dummy registers before first register that needs
27341    to be tracked in unwind information; such dummy registers are there just
27342    to avoid separate stack adjustment, and will not be restored in the
27343    epilogue.  */
27344
27345 static void
27346 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27347 {
27348   int i;
27349   HOST_WIDE_INT offset;
27350   HOST_WIDE_INT nregs;
27351   int reg_size;
27352   unsigned reg;
27353   unsigned lastreg;
27354   unsigned padfirst = 0, padlast = 0;
27355   rtx e;
27356
27357   e = XVECEXP (p, 0, 0);
27358   gcc_assert (GET_CODE (e) == SET);
27359
27360   /* First insn will adjust the stack pointer.  */
27361   gcc_assert (GET_CODE (e) == SET
27362               && REG_P (SET_DEST (e))
27363               && REGNO (SET_DEST (e)) == SP_REGNUM
27364               && GET_CODE (SET_SRC (e)) == PLUS);
27365
27366   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27367   nregs = XVECLEN (p, 0) - 1;
27368   gcc_assert (nregs);
27369
27370   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27371   if (reg < 16)
27372     {
27373       /* For -Os dummy registers can be pushed at the beginning to
27374          avoid separate stack pointer adjustment.  */
27375       e = XVECEXP (p, 0, 1);
27376       e = XEXP (SET_DEST (e), 0);
27377       if (GET_CODE (e) == PLUS)
27378         padfirst = INTVAL (XEXP (e, 1));
27379       gcc_assert (padfirst == 0 || optimize_size);
27380       /* The function prologue may also push pc, but not annotate it as it is
27381          never restored.  We turn this into a stack pointer adjustment.  */
27382       e = XVECEXP (p, 0, nregs);
27383       e = XEXP (SET_DEST (e), 0);
27384       if (GET_CODE (e) == PLUS)
27385         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27386       else
27387         padlast = offset - 4;
27388       gcc_assert (padlast == 0 || padlast == 4);
27389       if (padlast == 4)
27390         fprintf (asm_out_file, "\t.pad #4\n");
27391       reg_size = 4;
27392       fprintf (asm_out_file, "\t.save {");
27393     }
27394   else if (IS_VFP_REGNUM (reg))
27395     {
27396       reg_size = 8;
27397       fprintf (asm_out_file, "\t.vsave {");
27398     }
27399   else
27400     /* Unknown register type.  */
27401     gcc_unreachable ();
27402
27403   /* If the stack increment doesn't match the size of the saved registers,
27404      something has gone horribly wrong.  */
27405   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27406
27407   offset = padfirst;
27408   lastreg = 0;
27409   /* The remaining insns will describe the stores.  */
27410   for (i = 1; i <= nregs; i++)
27411     {
27412       /* Expect (set (mem <addr>) (reg)).
27413          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27414       e = XVECEXP (p, 0, i);
27415       gcc_assert (GET_CODE (e) == SET
27416                   && MEM_P (SET_DEST (e))
27417                   && REG_P (SET_SRC (e)));
27418
27419       reg = REGNO (SET_SRC (e));
27420       gcc_assert (reg >= lastreg);
27421
27422       if (i != 1)
27423         fprintf (asm_out_file, ", ");
27424       /* We can't use %r for vfp because we need to use the
27425          double precision register names.  */
27426       if (IS_VFP_REGNUM (reg))
27427         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27428       else
27429         asm_fprintf (asm_out_file, "%r", reg);
27430
27431       if (flag_checking)
27432         {
27433           /* Check that the addresses are consecutive.  */
27434           e = XEXP (SET_DEST (e), 0);
27435           if (GET_CODE (e) == PLUS)
27436             gcc_assert (REG_P (XEXP (e, 0))
27437                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27438                         && CONST_INT_P (XEXP (e, 1))
27439                         && offset == INTVAL (XEXP (e, 1)));
27440           else
27441             gcc_assert (i == 1
27442                         && REG_P (e)
27443                         && REGNO (e) == SP_REGNUM);
27444           offset += reg_size;
27445         }
27446     }
27447   fprintf (asm_out_file, "}\n");
27448   if (padfirst)
27449     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27450 }
27451
27452 /*  Emit unwind directives for a SET.  */
27453
27454 static void
27455 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27456 {
27457   rtx e0;
27458   rtx e1;
27459   unsigned reg;
27460
27461   e0 = XEXP (p, 0);
27462   e1 = XEXP (p, 1);
27463   switch (GET_CODE (e0))
27464     {
27465     case MEM:
27466       /* Pushing a single register.  */
27467       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27468           || !REG_P (XEXP (XEXP (e0, 0), 0))
27469           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27470         abort ();
27471
27472       asm_fprintf (asm_out_file, "\t.save ");
27473       if (IS_VFP_REGNUM (REGNO (e1)))
27474         asm_fprintf(asm_out_file, "{d%d}\n",
27475                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27476       else
27477         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27478       break;
27479
27480     case REG:
27481       if (REGNO (e0) == SP_REGNUM)
27482         {
27483           /* A stack increment.  */
27484           if (GET_CODE (e1) != PLUS
27485               || !REG_P (XEXP (e1, 0))
27486               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27487               || !CONST_INT_P (XEXP (e1, 1)))
27488             abort ();
27489
27490           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27491                        -INTVAL (XEXP (e1, 1)));
27492         }
27493       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27494         {
27495           HOST_WIDE_INT offset;
27496
27497           if (GET_CODE (e1) == PLUS)
27498             {
27499               if (!REG_P (XEXP (e1, 0))
27500                   || !CONST_INT_P (XEXP (e1, 1)))
27501                 abort ();
27502               reg = REGNO (XEXP (e1, 0));
27503               offset = INTVAL (XEXP (e1, 1));
27504               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27505                            HARD_FRAME_POINTER_REGNUM, reg,
27506                            offset);
27507             }
27508           else if (REG_P (e1))
27509             {
27510               reg = REGNO (e1);
27511               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27512                            HARD_FRAME_POINTER_REGNUM, reg);
27513             }
27514           else
27515             abort ();
27516         }
27517       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27518         {
27519           /* Move from sp to reg.  */
27520           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27521         }
27522      else if (GET_CODE (e1) == PLUS
27523               && REG_P (XEXP (e1, 0))
27524               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27525               && CONST_INT_P (XEXP (e1, 1)))
27526         {
27527           /* Set reg to offset from sp.  */
27528           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27529                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27530         }
27531       else
27532         abort ();
27533       break;
27534
27535     default:
27536       abort ();
27537     }
27538 }
27539
27540
27541 /* Emit unwind directives for the given insn.  */
27542
27543 static void
27544 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27545 {
27546   rtx note, pat;
27547   bool handled_one = false;
27548
27549   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27550     return;
27551
27552   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27553       && (TREE_NOTHROW (current_function_decl)
27554           || crtl->all_throwers_are_sibcalls))
27555     return;
27556
27557   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27558     return;
27559
27560   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27561     {
27562       switch (REG_NOTE_KIND (note))
27563         {
27564         case REG_FRAME_RELATED_EXPR:
27565           pat = XEXP (note, 0);
27566           goto found;
27567
27568         case REG_CFA_REGISTER:
27569           pat = XEXP (note, 0);
27570           if (pat == NULL)
27571             {
27572               pat = PATTERN (insn);
27573               if (GET_CODE (pat) == PARALLEL)
27574                 pat = XVECEXP (pat, 0, 0);
27575             }
27576
27577           /* Only emitted for IS_STACKALIGN re-alignment.  */
27578           {
27579             rtx dest, src;
27580             unsigned reg;
27581
27582             src = SET_SRC (pat);
27583             dest = SET_DEST (pat);
27584
27585             gcc_assert (src == stack_pointer_rtx);
27586             reg = REGNO (dest);
27587             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27588                          reg + 0x90, reg);
27589           }
27590           handled_one = true;
27591           break;
27592
27593         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27594            to get correct dwarf information for shrink-wrap.  We should not
27595            emit unwind information for it because these are used either for
27596            pretend arguments or notes to adjust sp and restore registers from
27597            stack.  */
27598         case REG_CFA_DEF_CFA:
27599         case REG_CFA_ADJUST_CFA:
27600         case REG_CFA_RESTORE:
27601           return;
27602
27603         case REG_CFA_EXPRESSION:
27604         case REG_CFA_OFFSET:
27605           /* ??? Only handling here what we actually emit.  */
27606           gcc_unreachable ();
27607
27608         default:
27609           break;
27610         }
27611     }
27612   if (handled_one)
27613     return;
27614   pat = PATTERN (insn);
27615  found:
27616
27617   switch (GET_CODE (pat))
27618     {
27619     case SET:
27620       arm_unwind_emit_set (asm_out_file, pat);
27621       break;
27622
27623     case SEQUENCE:
27624       /* Store multiple.  */
27625       arm_unwind_emit_sequence (asm_out_file, pat);
27626       break;
27627
27628     default:
27629       abort();
27630     }
27631 }
27632
27633
27634 /* Output a reference from a function exception table to the type_info
27635    object X.  The EABI specifies that the symbol should be relocated by
27636    an R_ARM_TARGET2 relocation.  */
27637
27638 static bool
27639 arm_output_ttype (rtx x)
27640 {
27641   fputs ("\t.word\t", asm_out_file);
27642   output_addr_const (asm_out_file, x);
27643   /* Use special relocations for symbol references.  */
27644   if (!CONST_INT_P (x))
27645     fputs ("(TARGET2)", asm_out_file);
27646   fputc ('\n', asm_out_file);
27647
27648   return TRUE;
27649 }
27650
27651 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27652
27653 static void
27654 arm_asm_emit_except_personality (rtx personality)
27655 {
27656   fputs ("\t.personality\t", asm_out_file);
27657   output_addr_const (asm_out_file, personality);
27658   fputc ('\n', asm_out_file);
27659 }
27660 #endif /* ARM_UNWIND_INFO */
27661
27662 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27663
27664 static void
27665 arm_asm_init_sections (void)
27666 {
27667 #if ARM_UNWIND_INFO
27668   exception_section = get_unnamed_section (0, output_section_asm_op,
27669                                            "\t.handlerdata");
27670 #endif /* ARM_UNWIND_INFO */
27671
27672 #ifdef OBJECT_FORMAT_ELF
27673   if (target_pure_code)
27674     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27675 #endif
27676 }
27677
27678 /* Output unwind directives for the start/end of a function.  */
27679
27680 void
27681 arm_output_fn_unwind (FILE * f, bool prologue)
27682 {
27683   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27684     return;
27685
27686   if (prologue)
27687     fputs ("\t.fnstart\n", f);
27688   else
27689     {
27690       /* If this function will never be unwound, then mark it as such.
27691          The came condition is used in arm_unwind_emit to suppress
27692          the frame annotations.  */
27693       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27694           && (TREE_NOTHROW (current_function_decl)
27695               || crtl->all_throwers_are_sibcalls))
27696         fputs("\t.cantunwind\n", f);
27697
27698       fputs ("\t.fnend\n", f);
27699     }
27700 }
27701
27702 static bool
27703 arm_emit_tls_decoration (FILE *fp, rtx x)
27704 {
27705   enum tls_reloc reloc;
27706   rtx val;
27707
27708   val = XVECEXP (x, 0, 0);
27709   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27710
27711   output_addr_const (fp, val);
27712
27713   switch (reloc)
27714     {
27715     case TLS_GD32:
27716       fputs ("(tlsgd)", fp);
27717       break;
27718     case TLS_LDM32:
27719       fputs ("(tlsldm)", fp);
27720       break;
27721     case TLS_LDO32:
27722       fputs ("(tlsldo)", fp);
27723       break;
27724     case TLS_IE32:
27725       fputs ("(gottpoff)", fp);
27726       break;
27727     case TLS_LE32:
27728       fputs ("(tpoff)", fp);
27729       break;
27730     case TLS_DESCSEQ:
27731       fputs ("(tlsdesc)", fp);
27732       break;
27733     default:
27734       gcc_unreachable ();
27735     }
27736
27737   switch (reloc)
27738     {
27739     case TLS_GD32:
27740     case TLS_LDM32:
27741     case TLS_IE32:
27742     case TLS_DESCSEQ:
27743       fputs (" + (. - ", fp);
27744       output_addr_const (fp, XVECEXP (x, 0, 2));
27745       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27746       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27747       output_addr_const (fp, XVECEXP (x, 0, 3));
27748       fputc (')', fp);
27749       break;
27750     default:
27751       break;
27752     }
27753
27754   return TRUE;
27755 }
27756
27757 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27758
27759 static void
27760 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27761 {
27762   gcc_assert (size == 4);
27763   fputs ("\t.word\t", file);
27764   output_addr_const (file, x);
27765   fputs ("(tlsldo)", file);
27766 }
27767
27768 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27769
27770 static bool
27771 arm_output_addr_const_extra (FILE *fp, rtx x)
27772 {
27773   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27774     return arm_emit_tls_decoration (fp, x);
27775   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27776     {
27777       char label[256];
27778       int labelno = INTVAL (XVECEXP (x, 0, 0));
27779
27780       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27781       assemble_name_raw (fp, label);
27782
27783       return TRUE;
27784     }
27785   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27786     {
27787       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27788       if (GOT_PCREL)
27789         fputs ("+.", fp);
27790       fputs ("-(", fp);
27791       output_addr_const (fp, XVECEXP (x, 0, 0));
27792       fputc (')', fp);
27793       return TRUE;
27794     }
27795   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27796     {
27797       output_addr_const (fp, XVECEXP (x, 0, 0));
27798       if (GOT_PCREL)
27799         fputs ("+.", fp);
27800       fputs ("-(", fp);
27801       output_addr_const (fp, XVECEXP (x, 0, 1));
27802       fputc (')', fp);
27803       return TRUE;
27804     }
27805   else if (GET_CODE (x) == CONST_VECTOR)
27806     return arm_emit_vector_const (fp, x);
27807
27808   return FALSE;
27809 }
27810
27811 /* Output assembly for a shift instruction.
27812    SET_FLAGS determines how the instruction modifies the condition codes.
27813    0 - Do not set condition codes.
27814    1 - Set condition codes.
27815    2 - Use smallest instruction.  */
27816 const char *
27817 arm_output_shift(rtx * operands, int set_flags)
27818 {
27819   char pattern[100];
27820   static const char flag_chars[3] = {'?', '.', '!'};
27821   const char *shift;
27822   HOST_WIDE_INT val;
27823   char c;
27824
27825   c = flag_chars[set_flags];
27826   shift = shift_op(operands[3], &val);
27827   if (shift)
27828     {
27829       if (val != -1)
27830         operands[2] = GEN_INT(val);
27831       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27832     }
27833   else
27834     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27835
27836   output_asm_insn (pattern, operands);
27837   return "";
27838 }
27839
27840 /* Output assembly for a WMMX immediate shift instruction.  */
27841 const char *
27842 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27843 {
27844   int shift = INTVAL (operands[2]);
27845   char templ[50];
27846   machine_mode opmode = GET_MODE (operands[0]);
27847
27848   gcc_assert (shift >= 0);
27849
27850   /* If the shift value in the register versions is > 63 (for D qualifier),
27851      31 (for W qualifier) or 15 (for H qualifier).  */
27852   if (((opmode == V4HImode) && (shift > 15))
27853         || ((opmode == V2SImode) && (shift > 31))
27854         || ((opmode == DImode) && (shift > 63)))
27855   {
27856     if (wror_or_wsra)
27857       {
27858         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27859         output_asm_insn (templ, operands);
27860         if (opmode == DImode)
27861           {
27862             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27863             output_asm_insn (templ, operands);
27864           }
27865       }
27866     else
27867       {
27868         /* The destination register will contain all zeros.  */
27869         sprintf (templ, "wzero\t%%0");
27870         output_asm_insn (templ, operands);
27871       }
27872     return "";
27873   }
27874
27875   if ((opmode == DImode) && (shift > 32))
27876     {
27877       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27878       output_asm_insn (templ, operands);
27879       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27880       output_asm_insn (templ, operands);
27881     }
27882   else
27883     {
27884       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27885       output_asm_insn (templ, operands);
27886     }
27887   return "";
27888 }
27889
27890 /* Output assembly for a WMMX tinsr instruction.  */
27891 const char *
27892 arm_output_iwmmxt_tinsr (rtx *operands)
27893 {
27894   int mask = INTVAL (operands[3]);
27895   int i;
27896   char templ[50];
27897   int units = mode_nunits[GET_MODE (operands[0])];
27898   gcc_assert ((mask & (mask - 1)) == 0);
27899   for (i = 0; i < units; ++i)
27900     {
27901       if ((mask & 0x01) == 1)
27902         {
27903           break;
27904         }
27905       mask >>= 1;
27906     }
27907   gcc_assert (i < units);
27908   {
27909     switch (GET_MODE (operands[0]))
27910       {
27911       case E_V8QImode:
27912         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27913         break;
27914       case E_V4HImode:
27915         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27916         break;
27917       case E_V2SImode:
27918         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27919         break;
27920       default:
27921         gcc_unreachable ();
27922         break;
27923       }
27924     output_asm_insn (templ, operands);
27925   }
27926   return "";
27927 }
27928
27929 /* Output a Thumb-1 casesi dispatch sequence.  */
27930 const char *
27931 thumb1_output_casesi (rtx *operands)
27932 {
27933   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27934
27935   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27936
27937   switch (GET_MODE(diff_vec))
27938     {
27939     case E_QImode:
27940       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27941               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27942     case E_HImode:
27943       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27944               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27945     case E_SImode:
27946       return "bl\t%___gnu_thumb1_case_si";
27947     default:
27948       gcc_unreachable ();
27949     }
27950 }
27951
27952 /* Output a Thumb-2 casesi instruction.  */
27953 const char *
27954 thumb2_output_casesi (rtx *operands)
27955 {
27956   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27957
27958   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27959
27960   output_asm_insn ("cmp\t%0, %1", operands);
27961   output_asm_insn ("bhi\t%l3", operands);
27962   switch (GET_MODE(diff_vec))
27963     {
27964     case E_QImode:
27965       return "tbb\t[%|pc, %0]";
27966     case E_HImode:
27967       return "tbh\t[%|pc, %0, lsl #1]";
27968     case E_SImode:
27969       if (flag_pic)
27970         {
27971           output_asm_insn ("adr\t%4, %l2", operands);
27972           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27973           output_asm_insn ("add\t%4, %4, %5", operands);
27974           return "bx\t%4";
27975         }
27976       else
27977         {
27978           output_asm_insn ("adr\t%4, %l2", operands);
27979           return "ldr\t%|pc, [%4, %0, lsl #2]";
27980         }
27981     default:
27982       gcc_unreachable ();
27983     }
27984 }
27985
27986 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27987    per-core tuning structs.  */
27988 static int
27989 arm_issue_rate (void)
27990 {
27991   return current_tune->issue_rate;
27992 }
27993
27994 /* Return how many instructions should scheduler lookahead to choose the
27995    best one.  */
27996 static int
27997 arm_first_cycle_multipass_dfa_lookahead (void)
27998 {
27999   int issue_rate = arm_issue_rate ();
28000
28001   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28002 }
28003
28004 /* Enable modeling of L2 auto-prefetcher.  */
28005 static int
28006 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28007 {
28008   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28009 }
28010
28011 const char *
28012 arm_mangle_type (const_tree type)
28013 {
28014   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28015      has to be managled as if it is in the "std" namespace.  */
28016   if (TARGET_AAPCS_BASED
28017       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28018     return "St9__va_list";
28019
28020   /* Half-precision float.  */
28021   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28022     return "Dh";
28023
28024   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28025      builtin type.  */
28026   if (TYPE_NAME (type) != NULL)
28027     return arm_mangle_builtin_type (type);
28028
28029   /* Use the default mangling.  */
28030   return NULL;
28031 }
28032
28033 /* Order of allocation of core registers for Thumb: this allocation is
28034    written over the corresponding initial entries of the array
28035    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28036    first.  Saving and restoring a low register is usually cheaper than
28037    using a call-clobbered high register.  */
28038
28039 static const int thumb_core_reg_alloc_order[] =
28040 {
28041    3,  2,  1,  0,  4,  5,  6,  7,
28042   12, 14,  8,  9, 10, 11
28043 };
28044
28045 /* Adjust register allocation order when compiling for Thumb.  */
28046
28047 void
28048 arm_order_regs_for_local_alloc (void)
28049 {
28050   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28051   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28052   if (TARGET_THUMB)
28053     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28054             sizeof (thumb_core_reg_alloc_order));
28055 }
28056
28057 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28058
28059 bool
28060 arm_frame_pointer_required (void)
28061 {
28062   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28063     return true;
28064
28065   /* If the function receives nonlocal gotos, it needs to save the frame
28066      pointer in the nonlocal_goto_save_area object.  */
28067   if (cfun->has_nonlocal_label)
28068     return true;
28069
28070   /* The frame pointer is required for non-leaf APCS frames.  */
28071   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28072     return true;
28073
28074   /* If we are probing the stack in the prologue, we will have a faulting
28075      instruction prior to the stack adjustment and this requires a frame
28076      pointer if we want to catch the exception using the EABI unwinder.  */
28077   if (!IS_INTERRUPT (arm_current_func_type ())
28078       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28079           || flag_stack_clash_protection)
28080       && arm_except_unwind_info (&global_options) == UI_TARGET
28081       && cfun->can_throw_non_call_exceptions)
28082     {
28083       HOST_WIDE_INT size = get_frame_size ();
28084
28085       /* That's irrelevant if there is no stack adjustment.  */
28086       if (size <= 0)
28087         return false;
28088
28089       /* That's relevant only if there is a stack probe.  */
28090       if (crtl->is_leaf && !cfun->calls_alloca)
28091         {
28092           /* We don't have the final size of the frame so adjust.  */
28093           size += 32 * UNITS_PER_WORD;
28094           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28095             return true;
28096         }
28097       else
28098         return true;
28099     }
28100
28101   return false;
28102 }
28103
28104 /* Only thumb1 can't support conditional execution, so return true if
28105    the target is not thumb1.  */
28106 static bool
28107 arm_have_conditional_execution (void)
28108 {
28109   return !TARGET_THUMB1;
28110 }
28111
28112 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28113 static HOST_WIDE_INT
28114 arm_vector_alignment (const_tree type)
28115 {
28116   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28117
28118   if (TARGET_AAPCS_BASED)
28119     align = MIN (align, 64);
28120
28121   return align;
28122 }
28123
28124 static void
28125 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28126 {
28127   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28128     {
28129       sizes->safe_push (16);
28130       sizes->safe_push (8);
28131     }
28132 }
28133
28134 static bool
28135 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28136 {
28137   /* Vectors which aren't in packed structures will not be less aligned than
28138      the natural alignment of their element type, so this is safe.  */
28139   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28140     return !is_packed;
28141
28142   return default_builtin_vector_alignment_reachable (type, is_packed);
28143 }
28144
28145 static bool
28146 arm_builtin_support_vector_misalignment (machine_mode mode,
28147                                          const_tree type, int misalignment,
28148                                          bool is_packed)
28149 {
28150   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28151     {
28152       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28153
28154       if (is_packed)
28155         return align == 1;
28156
28157       /* If the misalignment is unknown, we should be able to handle the access
28158          so long as it is not to a member of a packed data structure.  */
28159       if (misalignment == -1)
28160         return true;
28161
28162       /* Return true if the misalignment is a multiple of the natural alignment
28163          of the vector's element type.  This is probably always going to be
28164          true in practice, since we've already established that this isn't a
28165          packed access.  */
28166       return ((misalignment % align) == 0);
28167     }
28168
28169   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28170                                                       is_packed);
28171 }
28172
28173 static void
28174 arm_conditional_register_usage (void)
28175 {
28176   int regno;
28177
28178   if (TARGET_THUMB1 && optimize_size)
28179     {
28180       /* When optimizing for size on Thumb-1, it's better not
28181         to use the HI regs, because of the overhead of
28182         stacking them.  */
28183       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28184         fixed_regs[regno] = call_used_regs[regno] = 1;
28185     }
28186
28187   /* The link register can be clobbered by any branch insn,
28188      but we have no way to track that at present, so mark
28189      it as unavailable.  */
28190   if (TARGET_THUMB1)
28191     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28192
28193   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28194     {
28195       /* VFPv3 registers are disabled when earlier VFP
28196          versions are selected due to the definition of
28197          LAST_VFP_REGNUM.  */
28198       for (regno = FIRST_VFP_REGNUM;
28199            regno <= LAST_VFP_REGNUM; ++ regno)
28200         {
28201           fixed_regs[regno] = 0;
28202           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28203             || regno >= FIRST_VFP_REGNUM + 32;
28204         }
28205     }
28206
28207   if (TARGET_REALLY_IWMMXT)
28208     {
28209       regno = FIRST_IWMMXT_GR_REGNUM;
28210       /* The 2002/10/09 revision of the XScale ABI has wCG0
28211          and wCG1 as call-preserved registers.  The 2002/11/21
28212          revision changed this so that all wCG registers are
28213          scratch registers.  */
28214       for (regno = FIRST_IWMMXT_GR_REGNUM;
28215            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28216         fixed_regs[regno] = 0;
28217       /* The XScale ABI has wR0 - wR9 as scratch registers,
28218          the rest as call-preserved registers.  */
28219       for (regno = FIRST_IWMMXT_REGNUM;
28220            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28221         {
28222           fixed_regs[regno] = 0;
28223           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28224         }
28225     }
28226
28227   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28228     {
28229       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28230       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28231     }
28232   else if (TARGET_APCS_STACK)
28233     {
28234       fixed_regs[10]     = 1;
28235       call_used_regs[10] = 1;
28236     }
28237   /* -mcaller-super-interworking reserves r11 for calls to
28238      _interwork_r11_call_via_rN().  Making the register global
28239      is an easy way of ensuring that it remains valid for all
28240      calls.  */
28241   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28242       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28243     {
28244       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28245       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28246       if (TARGET_CALLER_INTERWORKING)
28247         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28248     }
28249   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28250 }
28251
28252 static reg_class_t
28253 arm_preferred_rename_class (reg_class_t rclass)
28254 {
28255   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28256      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28257      and code size can be reduced.  */
28258   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28259     return LO_REGS;
28260   else
28261     return NO_REGS;
28262 }
28263
28264 /* Compute the attribute "length" of insn "*push_multi".
28265    So this function MUST be kept in sync with that insn pattern.  */
28266 int
28267 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28268 {
28269   int i, regno, hi_reg;
28270   int num_saves = XVECLEN (parallel_op, 0);
28271
28272   /* ARM mode.  */
28273   if (TARGET_ARM)
28274     return 4;
28275   /* Thumb1 mode.  */
28276   if (TARGET_THUMB1)
28277     return 2;
28278
28279   /* Thumb2 mode.  */
28280   regno = REGNO (first_op);
28281   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28282      list is 8-bit.  Normally this means all registers in the list must be
28283      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28284      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28285      with 16-bit encoding.  */
28286   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28287   for (i = 1; i < num_saves && !hi_reg; i++)
28288     {
28289       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28290       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28291     }
28292
28293   if (!hi_reg)
28294     return 2;
28295   return 4;
28296 }
28297
28298 /* Compute the attribute "length" of insn.  Currently, this function is used
28299    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28300    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28301    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28302    true if OPERANDS contains insn which explicit updates base register.  */
28303
28304 int
28305 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28306 {
28307   /* ARM mode.  */
28308   if (TARGET_ARM)
28309     return 4;
28310   /* Thumb1 mode.  */
28311   if (TARGET_THUMB1)
28312     return 2;
28313
28314   rtx parallel_op = operands[0];
28315   /* Initialize to elements number of PARALLEL.  */
28316   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28317   /* Initialize the value to base register.  */
28318   unsigned regno = REGNO (operands[1]);
28319   /* Skip return and write back pattern.
28320      We only need register pop pattern for later analysis.  */
28321   unsigned first_indx = 0;
28322   first_indx += return_pc ? 1 : 0;
28323   first_indx += write_back_p ? 1 : 0;
28324
28325   /* A pop operation can be done through LDM or POP.  If the base register is SP
28326      and if it's with write back, then a LDM will be alias of POP.  */
28327   bool pop_p = (regno == SP_REGNUM && write_back_p);
28328   bool ldm_p = !pop_p;
28329
28330   /* Check base register for LDM.  */
28331   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28332     return 4;
28333
28334   /* Check each register in the list.  */
28335   for (; indx >= first_indx; indx--)
28336     {
28337       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28338       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28339          comment in arm_attr_length_push_multi.  */
28340       if (REGNO_REG_CLASS (regno) == HI_REGS
28341           && (regno != PC_REGNUM || ldm_p))
28342         return 4;
28343     }
28344
28345   return 2;
28346 }
28347
28348 /* Compute the number of instructions emitted by output_move_double.  */
28349 int
28350 arm_count_output_move_double_insns (rtx *operands)
28351 {
28352   int count;
28353   rtx ops[2];
28354   /* output_move_double may modify the operands array, so call it
28355      here on a copy of the array.  */
28356   ops[0] = operands[0];
28357   ops[1] = operands[1];
28358   output_move_double (ops, false, &count);
28359   return count;
28360 }
28361
28362 int
28363 vfp3_const_double_for_fract_bits (rtx operand)
28364 {
28365   REAL_VALUE_TYPE r0;
28366
28367   if (!CONST_DOUBLE_P (operand))
28368     return 0;
28369
28370   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28371   if (exact_real_inverse (DFmode, &r0)
28372       && !REAL_VALUE_NEGATIVE (r0))
28373     {
28374       if (exact_real_truncate (DFmode, &r0))
28375         {
28376           HOST_WIDE_INT value = real_to_integer (&r0);
28377           value = value & 0xffffffff;
28378           if ((value != 0) && ( (value & (value - 1)) == 0))
28379             {
28380               int ret = exact_log2 (value);
28381               gcc_assert (IN_RANGE (ret, 0, 31));
28382               return ret;
28383             }
28384         }
28385     }
28386   return 0;
28387 }
28388
28389 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28390    log2 is in [1, 32], return that log2.  Otherwise return -1.
28391    This is used in the patterns for vcvt.s32.f32 floating-point to
28392    fixed-point conversions.  */
28393
28394 int
28395 vfp3_const_double_for_bits (rtx x)
28396 {
28397   const REAL_VALUE_TYPE *r;
28398
28399   if (!CONST_DOUBLE_P (x))
28400     return -1;
28401
28402   r = CONST_DOUBLE_REAL_VALUE (x);
28403
28404   if (REAL_VALUE_NEGATIVE (*r)
28405       || REAL_VALUE_ISNAN (*r)
28406       || REAL_VALUE_ISINF (*r)
28407       || !real_isinteger (r, SFmode))
28408     return -1;
28409
28410   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28411
28412 /* The exact_log2 above will have returned -1 if this is
28413    not an exact log2.  */
28414   if (!IN_RANGE (hwint, 1, 32))
28415     return -1;
28416
28417   return hwint;
28418 }
28419
28420 \f
28421 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28422
28423 static void
28424 arm_pre_atomic_barrier (enum memmodel model)
28425 {
28426   if (need_atomic_barrier_p (model, true))
28427     emit_insn (gen_memory_barrier ());
28428 }
28429
28430 static void
28431 arm_post_atomic_barrier (enum memmodel model)
28432 {
28433   if (need_atomic_barrier_p (model, false))
28434     emit_insn (gen_memory_barrier ());
28435 }
28436
28437 /* Emit the load-exclusive and store-exclusive instructions.
28438    Use acquire and release versions if necessary.  */
28439
28440 static void
28441 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28442 {
28443   rtx (*gen) (rtx, rtx);
28444
28445   if (acq)
28446     {
28447       switch (mode)
28448         {
28449         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28450         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28451         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28452         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28453         default:
28454           gcc_unreachable ();
28455         }
28456     }
28457   else
28458     {
28459       switch (mode)
28460         {
28461         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28462         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28463         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28464         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28465         default:
28466           gcc_unreachable ();
28467         }
28468     }
28469
28470   emit_insn (gen (rval, mem));
28471 }
28472
28473 static void
28474 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28475                           rtx mem, bool rel)
28476 {
28477   rtx (*gen) (rtx, rtx, rtx);
28478
28479   if (rel)
28480     {
28481       switch (mode)
28482         {
28483         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28484         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28485         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28486         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28487         default:
28488           gcc_unreachable ();
28489         }
28490     }
28491   else
28492     {
28493       switch (mode)
28494         {
28495         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28496         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28497         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28498         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28499         default:
28500           gcc_unreachable ();
28501         }
28502     }
28503
28504   emit_insn (gen (bval, rval, mem));
28505 }
28506
28507 /* Mark the previous jump instruction as unlikely.  */
28508
28509 static void
28510 emit_unlikely_jump (rtx insn)
28511 {
28512   rtx_insn *jump = emit_jump_insn (insn);
28513   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28514 }
28515
28516 /* Expand a compare and swap pattern.  */
28517
28518 void
28519 arm_expand_compare_and_swap (rtx operands[])
28520 {
28521   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28522   machine_mode mode;
28523   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28524
28525   bval = operands[0];
28526   rval = operands[1];
28527   mem = operands[2];
28528   oldval = operands[3];
28529   newval = operands[4];
28530   is_weak = operands[5];
28531   mod_s = operands[6];
28532   mod_f = operands[7];
28533   mode = GET_MODE (mem);
28534
28535   /* Normally the succ memory model must be stronger than fail, but in the
28536      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28537      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28538
28539   if (TARGET_HAVE_LDACQ
28540       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28541       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28542     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28543
28544   switch (mode)
28545     {
28546     case E_QImode:
28547     case E_HImode:
28548       /* For narrow modes, we're going to perform the comparison in SImode,
28549          so do the zero-extension now.  */
28550       rval = gen_reg_rtx (SImode);
28551       oldval = convert_modes (SImode, mode, oldval, true);
28552       /* FALLTHRU */
28553
28554     case E_SImode:
28555       /* Force the value into a register if needed.  We waited until after
28556          the zero-extension above to do this properly.  */
28557       if (!arm_add_operand (oldval, SImode))
28558         oldval = force_reg (SImode, oldval);
28559       break;
28560
28561     case E_DImode:
28562       if (!cmpdi_operand (oldval, mode))
28563         oldval = force_reg (mode, oldval);
28564       break;
28565
28566     default:
28567       gcc_unreachable ();
28568     }
28569
28570   if (TARGET_THUMB1)
28571     {
28572       switch (mode)
28573         {
28574         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28575         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28576         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28577         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28578         default:
28579           gcc_unreachable ();
28580         }
28581     }
28582   else
28583     {
28584       switch (mode)
28585         {
28586         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28587         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28588         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28589         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28590         default:
28591           gcc_unreachable ();
28592         }
28593     }
28594
28595   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28596   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28597
28598   if (mode == QImode || mode == HImode)
28599     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28600
28601   /* In all cases, we arrange for success to be signaled by Z set.
28602      This arrangement allows for the boolean result to be used directly
28603      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28604      boolean negation of the result is also stored in bval because Thumb-1
28605      backend lacks dependency tracking for CC flag due to flag-setting not
28606      being represented at RTL level.  */
28607   if (TARGET_THUMB1)
28608       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28609   else
28610     {
28611       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28612       emit_insn (gen_rtx_SET (bval, x));
28613     }
28614 }
28615
28616 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28617    another memory store between the load-exclusive and store-exclusive can
28618    reset the monitor from Exclusive to Open state.  This means we must wait
28619    until after reload to split the pattern, lest we get a register spill in
28620    the middle of the atomic sequence.  Success of the compare and swap is
28621    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28622    for Thumb-1 targets (ie. negation of the boolean value returned by
28623    atomic_compare_and_swapmode standard pattern in operand 0).  */
28624
28625 void
28626 arm_split_compare_and_swap (rtx operands[])
28627 {
28628   rtx rval, mem, oldval, newval, neg_bval;
28629   machine_mode mode;
28630   enum memmodel mod_s, mod_f;
28631   bool is_weak;
28632   rtx_code_label *label1, *label2;
28633   rtx x, cond;
28634
28635   rval = operands[1];
28636   mem = operands[2];
28637   oldval = operands[3];
28638   newval = operands[4];
28639   is_weak = (operands[5] != const0_rtx);
28640   mod_s = memmodel_from_int (INTVAL (operands[6]));
28641   mod_f = memmodel_from_int (INTVAL (operands[7]));
28642   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28643   mode = GET_MODE (mem);
28644
28645   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28646
28647   bool use_acquire = TARGET_HAVE_LDACQ
28648                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28649                           || is_mm_release (mod_s));
28650
28651   bool use_release = TARGET_HAVE_LDACQ
28652                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28653                           || is_mm_acquire (mod_s));
28654
28655   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28656      a full barrier is emitted after the store-release.  */
28657   if (is_armv8_sync)
28658     use_acquire = false;
28659
28660   /* Checks whether a barrier is needed and emits one accordingly.  */
28661   if (!(use_acquire || use_release))
28662     arm_pre_atomic_barrier (mod_s);
28663
28664   label1 = NULL;
28665   if (!is_weak)
28666     {
28667       label1 = gen_label_rtx ();
28668       emit_label (label1);
28669     }
28670   label2 = gen_label_rtx ();
28671
28672   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28673
28674   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28675      as required to communicate with arm_expand_compare_and_swap.  */
28676   if (TARGET_32BIT)
28677     {
28678       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28679       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28680       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28681                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28682       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28683     }
28684   else
28685     {
28686       emit_move_insn (neg_bval, const1_rtx);
28687       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28688       if (thumb1_cmpneg_operand (oldval, SImode))
28689         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28690                                                     label2, cond));
28691       else
28692         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28693     }
28694
28695   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28696
28697   /* Weak or strong, we want EQ to be true for success, so that we
28698      match the flags that we got from the compare above.  */
28699   if (TARGET_32BIT)
28700     {
28701       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28702       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28703       emit_insn (gen_rtx_SET (cond, x));
28704     }
28705
28706   if (!is_weak)
28707     {
28708       /* Z is set to boolean value of !neg_bval, as required to communicate
28709          with arm_expand_compare_and_swap.  */
28710       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28711       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28712     }
28713
28714   if (!is_mm_relaxed (mod_f))
28715     emit_label (label2);
28716
28717   /* Checks whether a barrier is needed and emits one accordingly.  */
28718   if (is_armv8_sync
28719       || !(use_acquire || use_release))
28720     arm_post_atomic_barrier (mod_s);
28721
28722   if (is_mm_relaxed (mod_f))
28723     emit_label (label2);
28724 }
28725
28726 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28727    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28728    operation).  Operation is performed on the content at MEM and on VALUE
28729    following the memory model MODEL_RTX.  The content at MEM before and after
28730    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28731    success of the operation is returned in COND.  Using a scratch register or
28732    an operand register for these determines what result is returned for that
28733    pattern.  */
28734
28735 void
28736 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28737                      rtx value, rtx model_rtx, rtx cond)
28738 {
28739   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28740   machine_mode mode = GET_MODE (mem);
28741   machine_mode wmode = (mode == DImode ? DImode : SImode);
28742   rtx_code_label *label;
28743   bool all_low_regs, bind_old_new;
28744   rtx x;
28745
28746   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28747
28748   bool use_acquire = TARGET_HAVE_LDACQ
28749                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28750                           || is_mm_release (model));
28751
28752   bool use_release = TARGET_HAVE_LDACQ
28753                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28754                           || is_mm_acquire (model));
28755
28756   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28757      a full barrier is emitted after the store-release.  */
28758   if (is_armv8_sync)
28759     use_acquire = false;
28760
28761   /* Checks whether a barrier is needed and emits one accordingly.  */
28762   if (!(use_acquire || use_release))
28763     arm_pre_atomic_barrier (model);
28764
28765   label = gen_label_rtx ();
28766   emit_label (label);
28767
28768   if (new_out)
28769     new_out = gen_lowpart (wmode, new_out);
28770   if (old_out)
28771     old_out = gen_lowpart (wmode, old_out);
28772   else
28773     old_out = new_out;
28774   value = simplify_gen_subreg (wmode, value, mode, 0);
28775
28776   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28777
28778   /* Does the operation require destination and first operand to use the same
28779      register?  This is decided by register constraints of relevant insn
28780      patterns in thumb1.md.  */
28781   gcc_assert (!new_out || REG_P (new_out));
28782   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28783                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28784                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28785   bind_old_new =
28786     (TARGET_THUMB1
28787      && code != SET
28788      && code != MINUS
28789      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28790
28791   /* We want to return the old value while putting the result of the operation
28792      in the same register as the old value so copy the old value over to the
28793      destination register and use that register for the operation.  */
28794   if (old_out && bind_old_new)
28795     {
28796       emit_move_insn (new_out, old_out);
28797       old_out = new_out;
28798     }
28799
28800   switch (code)
28801     {
28802     case SET:
28803       new_out = value;
28804       break;
28805
28806     case NOT:
28807       x = gen_rtx_AND (wmode, old_out, value);
28808       emit_insn (gen_rtx_SET (new_out, x));
28809       x = gen_rtx_NOT (wmode, new_out);
28810       emit_insn (gen_rtx_SET (new_out, x));
28811       break;
28812
28813     case MINUS:
28814       if (CONST_INT_P (value))
28815         {
28816           value = GEN_INT (-INTVAL (value));
28817           code = PLUS;
28818         }
28819       /* FALLTHRU */
28820
28821     case PLUS:
28822       if (mode == DImode)
28823         {
28824           /* DImode plus/minus need to clobber flags.  */
28825           /* The adddi3 and subdi3 patterns are incorrectly written so that
28826              they require matching operands, even when we could easily support
28827              three operands.  Thankfully, this can be fixed up post-splitting,
28828              as the individual add+adc patterns do accept three operands and
28829              post-reload cprop can make these moves go away.  */
28830           emit_move_insn (new_out, old_out);
28831           if (code == PLUS)
28832             x = gen_adddi3 (new_out, new_out, value);
28833           else
28834             x = gen_subdi3 (new_out, new_out, value);
28835           emit_insn (x);
28836           break;
28837         }
28838       /* FALLTHRU */
28839
28840     default:
28841       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28842       emit_insn (gen_rtx_SET (new_out, x));
28843       break;
28844     }
28845
28846   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28847                             use_release);
28848
28849   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28850   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28851
28852   /* Checks whether a barrier is needed and emits one accordingly.  */
28853   if (is_armv8_sync
28854       || !(use_acquire || use_release))
28855     arm_post_atomic_barrier (model);
28856 }
28857 \f
28858 #define MAX_VECT_LEN 16
28859
28860 struct expand_vec_perm_d
28861 {
28862   rtx target, op0, op1;
28863   vec_perm_indices perm;
28864   machine_mode vmode;
28865   bool one_vector_p;
28866   bool testing_p;
28867 };
28868
28869 /* Generate a variable permutation.  */
28870
28871 static void
28872 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28873 {
28874   machine_mode vmode = GET_MODE (target);
28875   bool one_vector_p = rtx_equal_p (op0, op1);
28876
28877   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28878   gcc_checking_assert (GET_MODE (op0) == vmode);
28879   gcc_checking_assert (GET_MODE (op1) == vmode);
28880   gcc_checking_assert (GET_MODE (sel) == vmode);
28881   gcc_checking_assert (TARGET_NEON);
28882
28883   if (one_vector_p)
28884     {
28885       if (vmode == V8QImode)
28886         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28887       else
28888         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28889     }
28890   else
28891     {
28892       rtx pair;
28893
28894       if (vmode == V8QImode)
28895         {
28896           pair = gen_reg_rtx (V16QImode);
28897           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28898           pair = gen_lowpart (TImode, pair);
28899           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28900         }
28901       else
28902         {
28903           pair = gen_reg_rtx (OImode);
28904           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28905           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28906         }
28907     }
28908 }
28909
28910 void
28911 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28912 {
28913   machine_mode vmode = GET_MODE (target);
28914   unsigned int nelt = GET_MODE_NUNITS (vmode);
28915   bool one_vector_p = rtx_equal_p (op0, op1);
28916   rtx mask;
28917
28918   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28919      numbering of elements for big-endian, we must reverse the order.  */
28920   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28921
28922   /* The VTBL instruction does not use a modulo index, so we must take care
28923      of that ourselves.  */
28924   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28925   mask = gen_const_vec_duplicate (vmode, mask);
28926   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28927
28928   arm_expand_vec_perm_1 (target, op0, op1, sel);
28929 }
28930
28931 /* Map lane ordering between architectural lane order, and GCC lane order,
28932    taking into account ABI.  See comment above output_move_neon for details.  */
28933
28934 static int
28935 neon_endian_lane_map (machine_mode mode, int lane)
28936 {
28937   if (BYTES_BIG_ENDIAN)
28938   {
28939     int nelems = GET_MODE_NUNITS (mode);
28940     /* Reverse lane order.  */
28941     lane = (nelems - 1 - lane);
28942     /* Reverse D register order, to match ABI.  */
28943     if (GET_MODE_SIZE (mode) == 16)
28944       lane = lane ^ (nelems / 2);
28945   }
28946   return lane;
28947 }
28948
28949 /* Some permutations index into pairs of vectors, this is a helper function
28950    to map indexes into those pairs of vectors.  */
28951
28952 static int
28953 neon_pair_endian_lane_map (machine_mode mode, int lane)
28954 {
28955   int nelem = GET_MODE_NUNITS (mode);
28956   if (BYTES_BIG_ENDIAN)
28957     lane =
28958       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28959   return lane;
28960 }
28961
28962 /* Generate or test for an insn that supports a constant permutation.  */
28963
28964 /* Recognize patterns for the VUZP insns.  */
28965
28966 static bool
28967 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28968 {
28969   unsigned int i, odd, mask, nelt = d->perm.length ();
28970   rtx out0, out1, in0, in1;
28971   rtx (*gen)(rtx, rtx, rtx, rtx);
28972   int first_elem;
28973   int swap_nelt;
28974
28975   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28976     return false;
28977
28978   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28979      big endian pattern on 64 bit vectors, so we correct for that.  */
28980   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28981     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28982
28983   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28984
28985   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28986     odd = 0;
28987   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28988     odd = 1;
28989   else
28990     return false;
28991   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28992
28993   for (i = 0; i < nelt; i++)
28994     {
28995       unsigned elt =
28996         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28997       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28998         return false;
28999     }
29000
29001   /* Success!  */
29002   if (d->testing_p)
29003     return true;
29004
29005   switch (d->vmode)
29006     {
29007     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29008     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29009     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29010     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29011     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29012     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29013     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29014     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29015     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29016     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29017     default:
29018       gcc_unreachable ();
29019     }
29020
29021   in0 = d->op0;
29022   in1 = d->op1;
29023   if (swap_nelt != 0)
29024     std::swap (in0, in1);
29025
29026   out0 = d->target;
29027   out1 = gen_reg_rtx (d->vmode);
29028   if (odd)
29029     std::swap (out0, out1);
29030
29031   emit_insn (gen (out0, in0, in1, out1));
29032   return true;
29033 }
29034
29035 /* Recognize patterns for the VZIP insns.  */
29036
29037 static bool
29038 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29039 {
29040   unsigned int i, high, mask, nelt = d->perm.length ();
29041   rtx out0, out1, in0, in1;
29042   rtx (*gen)(rtx, rtx, rtx, rtx);
29043   int first_elem;
29044   bool is_swapped;
29045
29046   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29047     return false;
29048
29049   is_swapped = BYTES_BIG_ENDIAN;
29050
29051   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29052
29053   high = nelt / 2;
29054   if (first_elem == neon_endian_lane_map (d->vmode, high))
29055     ;
29056   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29057     high = 0;
29058   else
29059     return false;
29060   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29061
29062   for (i = 0; i < nelt / 2; i++)
29063     {
29064       unsigned elt =
29065         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29066       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29067           != elt)
29068         return false;
29069       elt =
29070         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29071       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29072           != elt)
29073         return false;
29074     }
29075
29076   /* Success!  */
29077   if (d->testing_p)
29078     return true;
29079
29080   switch (d->vmode)
29081     {
29082     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29083     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29084     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29085     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29086     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29087     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29088     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29089     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29090     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29091     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29092     default:
29093       gcc_unreachable ();
29094     }
29095
29096   in0 = d->op0;
29097   in1 = d->op1;
29098   if (is_swapped)
29099     std::swap (in0, in1);
29100
29101   out0 = d->target;
29102   out1 = gen_reg_rtx (d->vmode);
29103   if (high)
29104     std::swap (out0, out1);
29105
29106   emit_insn (gen (out0, in0, in1, out1));
29107   return true;
29108 }
29109
29110 /* Recognize patterns for the VREV insns.  */
29111
29112 static bool
29113 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29114 {
29115   unsigned int i, j, diff, nelt = d->perm.length ();
29116   rtx (*gen)(rtx, rtx);
29117
29118   if (!d->one_vector_p)
29119     return false;
29120
29121   diff = d->perm[0];
29122   switch (diff)
29123     {
29124     case 7:
29125       switch (d->vmode)
29126         {
29127         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29128         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29129         default:
29130           return false;
29131         }
29132       break;
29133     case 3:
29134       switch (d->vmode)
29135         {
29136         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29137         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29138         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29139         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29140         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29141         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29142         default:
29143           return false;
29144         }
29145       break;
29146     case 1:
29147       switch (d->vmode)
29148         {
29149         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29150         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29151         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29152         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29153         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29154         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29155         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29156         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29157         default:
29158           return false;
29159         }
29160       break;
29161     default:
29162       return false;
29163     }
29164
29165   for (i = 0; i < nelt ; i += diff + 1)
29166     for (j = 0; j <= diff; j += 1)
29167       {
29168         /* This is guaranteed to be true as the value of diff
29169            is 7, 3, 1 and we should have enough elements in the
29170            queue to generate this. Getting a vector mask with a
29171            value of diff other than these values implies that
29172            something is wrong by the time we get here.  */
29173         gcc_assert (i + j < nelt);
29174         if (d->perm[i + j] != i + diff - j)
29175           return false;
29176       }
29177
29178   /* Success! */
29179   if (d->testing_p)
29180     return true;
29181
29182   emit_insn (gen (d->target, d->op0));
29183   return true;
29184 }
29185
29186 /* Recognize patterns for the VTRN insns.  */
29187
29188 static bool
29189 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29190 {
29191   unsigned int i, odd, mask, nelt = d->perm.length ();
29192   rtx out0, out1, in0, in1;
29193   rtx (*gen)(rtx, rtx, rtx, rtx);
29194
29195   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29196     return false;
29197
29198   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29199   if (d->perm[0] == 0)
29200     odd = 0;
29201   else if (d->perm[0] == 1)
29202     odd = 1;
29203   else
29204     return false;
29205   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29206
29207   for (i = 0; i < nelt; i += 2)
29208     {
29209       if (d->perm[i] != i + odd)
29210         return false;
29211       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29212         return false;
29213     }
29214
29215   /* Success!  */
29216   if (d->testing_p)
29217     return true;
29218
29219   switch (d->vmode)
29220     {
29221     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29222     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29223     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29224     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29225     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29226     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29227     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29228     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29229     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29230     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29231     default:
29232       gcc_unreachable ();
29233     }
29234
29235   in0 = d->op0;
29236   in1 = d->op1;
29237   if (BYTES_BIG_ENDIAN)
29238     {
29239       std::swap (in0, in1);
29240       odd = !odd;
29241     }
29242
29243   out0 = d->target;
29244   out1 = gen_reg_rtx (d->vmode);
29245   if (odd)
29246     std::swap (out0, out1);
29247
29248   emit_insn (gen (out0, in0, in1, out1));
29249   return true;
29250 }
29251
29252 /* Recognize patterns for the VEXT insns.  */
29253
29254 static bool
29255 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29256 {
29257   unsigned int i, nelt = d->perm.length ();
29258   rtx (*gen) (rtx, rtx, rtx, rtx);
29259   rtx offset;
29260
29261   unsigned int location;
29262
29263   unsigned int next  = d->perm[0] + 1;
29264
29265   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29266   if (BYTES_BIG_ENDIAN)
29267     return false;
29268
29269   /* Check if the extracted indexes are increasing by one.  */
29270   for (i = 1; i < nelt; next++, i++)
29271     {
29272       /* If we hit the most significant element of the 2nd vector in
29273          the previous iteration, no need to test further.  */
29274       if (next == 2 * nelt)
29275         return false;
29276
29277       /* If we are operating on only one vector: it could be a
29278          rotation.  If there are only two elements of size < 64, let
29279          arm_evpc_neon_vrev catch it.  */
29280       if (d->one_vector_p && (next == nelt))
29281         {
29282           if ((nelt == 2) && (d->vmode != V2DImode))
29283             return false;
29284           else
29285             next = 0;
29286         }
29287
29288       if (d->perm[i] != next)
29289         return false;
29290     }
29291
29292   location = d->perm[0];
29293
29294   switch (d->vmode)
29295     {
29296     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29297     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29298     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29299     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29300     case E_V2SImode: gen = gen_neon_vextv2si; break;
29301     case E_V4SImode: gen = gen_neon_vextv4si; break;
29302     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29303     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29304     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29305     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29306     case E_V2DImode: gen = gen_neon_vextv2di; break;
29307     default:
29308       return false;
29309     }
29310
29311   /* Success! */
29312   if (d->testing_p)
29313     return true;
29314
29315   offset = GEN_INT (location);
29316   emit_insn (gen (d->target, d->op0, d->op1, offset));
29317   return true;
29318 }
29319
29320 /* The NEON VTBL instruction is a fully variable permuation that's even
29321    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29322    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29323    can do slightly better by expanding this as a constant where we don't
29324    have to apply a mask.  */
29325
29326 static bool
29327 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29328 {
29329   rtx rperm[MAX_VECT_LEN], sel;
29330   machine_mode vmode = d->vmode;
29331   unsigned int i, nelt = d->perm.length ();
29332
29333   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29334      numbering of elements for big-endian, we must reverse the order.  */
29335   if (BYTES_BIG_ENDIAN)
29336     return false;
29337
29338   if (d->testing_p)
29339     return true;
29340
29341   /* Generic code will try constant permutation twice.  Once with the
29342      original mode and again with the elements lowered to QImode.
29343      So wait and don't do the selector expansion ourselves.  */
29344   if (vmode != V8QImode && vmode != V16QImode)
29345     return false;
29346
29347   for (i = 0; i < nelt; ++i)
29348     rperm[i] = GEN_INT (d->perm[i]);
29349   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29350   sel = force_reg (vmode, sel);
29351
29352   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29353   return true;
29354 }
29355
29356 static bool
29357 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29358 {
29359   /* Check if the input mask matches vext before reordering the
29360      operands.  */
29361   if (TARGET_NEON)
29362     if (arm_evpc_neon_vext (d))
29363       return true;
29364
29365   /* The pattern matching functions above are written to look for a small
29366      number to begin the sequence (0, 1, N/2).  If we begin with an index
29367      from the second operand, we can swap the operands.  */
29368   unsigned int nelt = d->perm.length ();
29369   if (d->perm[0] >= nelt)
29370     {
29371       d->perm.rotate_inputs (1);
29372       std::swap (d->op0, d->op1);
29373     }
29374
29375   if (TARGET_NEON)
29376     {
29377       if (arm_evpc_neon_vuzp (d))
29378         return true;
29379       if (arm_evpc_neon_vzip (d))
29380         return true;
29381       if (arm_evpc_neon_vrev (d))
29382         return true;
29383       if (arm_evpc_neon_vtrn (d))
29384         return true;
29385       return arm_evpc_neon_vtbl (d);
29386     }
29387   return false;
29388 }
29389
29390 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29391
29392 static bool
29393 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29394                               const vec_perm_indices &sel)
29395 {
29396   struct expand_vec_perm_d d;
29397   int i, nelt, which;
29398
29399   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29400     return false;
29401
29402   d.target = target;
29403   d.op0 = op0;
29404   d.op1 = op1;
29405
29406   d.vmode = vmode;
29407   gcc_assert (VECTOR_MODE_P (d.vmode));
29408   d.testing_p = !target;
29409
29410   nelt = GET_MODE_NUNITS (d.vmode);
29411   for (i = which = 0; i < nelt; ++i)
29412     {
29413       int ei = sel[i] & (2 * nelt - 1);
29414       which |= (ei < nelt ? 1 : 2);
29415     }
29416
29417   switch (which)
29418     {
29419     default:
29420       gcc_unreachable();
29421
29422     case 3:
29423       d.one_vector_p = false;
29424       if (d.testing_p || !rtx_equal_p (op0, op1))
29425         break;
29426
29427       /* The elements of PERM do not suggest that only the first operand
29428          is used, but both operands are identical.  Allow easier matching
29429          of the permutation by folding the permutation into the single
29430          input vector.  */
29431       /* FALLTHRU */
29432     case 2:
29433       d.op0 = op1;
29434       d.one_vector_p = true;
29435       break;
29436
29437     case 1:
29438       d.op1 = op0;
29439       d.one_vector_p = true;
29440       break;
29441     }
29442
29443   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29444
29445   if (!d.testing_p)
29446     return arm_expand_vec_perm_const_1 (&d);
29447
29448   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29449   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29450   if (!d.one_vector_p)
29451     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29452
29453   start_sequence ();
29454   bool ret = arm_expand_vec_perm_const_1 (&d);
29455   end_sequence ();
29456
29457   return ret;
29458 }
29459
29460 bool
29461 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29462 {
29463   /* If we are soft float and we do not have ldrd
29464      then all auto increment forms are ok.  */
29465   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29466     return true;
29467
29468   switch (code)
29469     {
29470       /* Post increment and Pre Decrement are supported for all
29471          instruction forms except for vector forms.  */
29472     case ARM_POST_INC:
29473     case ARM_PRE_DEC:
29474       if (VECTOR_MODE_P (mode))
29475         {
29476           if (code != ARM_PRE_DEC)
29477             return true;
29478           else
29479             return false;
29480         }
29481
29482       return true;
29483
29484     case ARM_POST_DEC:
29485     case ARM_PRE_INC:
29486       /* Without LDRD and mode size greater than
29487          word size, there is no point in auto-incrementing
29488          because ldm and stm will not have these forms.  */
29489       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29490         return false;
29491
29492       /* Vector and floating point modes do not support
29493          these auto increment forms.  */
29494       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29495         return false;
29496
29497       return true;
29498
29499     default:
29500       return false;
29501
29502     }
29503
29504   return false;
29505 }
29506
29507 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29508    on ARM, since we know that shifts by negative amounts are no-ops.
29509    Additionally, the default expansion code is not available or suitable
29510    for post-reload insn splits (this can occur when the register allocator
29511    chooses not to do a shift in NEON).
29512
29513    This function is used in both initial expand and post-reload splits, and
29514    handles all kinds of 64-bit shifts.
29515
29516    Input requirements:
29517     - It is safe for the input and output to be the same register, but
29518       early-clobber rules apply for the shift amount and scratch registers.
29519     - Shift by register requires both scratch registers.  In all other cases
29520       the scratch registers may be NULL.
29521     - Ashiftrt by a register also clobbers the CC register.  */
29522 void
29523 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29524                                rtx amount, rtx scratch1, rtx scratch2)
29525 {
29526   rtx out_high = gen_highpart (SImode, out);
29527   rtx out_low = gen_lowpart (SImode, out);
29528   rtx in_high = gen_highpart (SImode, in);
29529   rtx in_low = gen_lowpart (SImode, in);
29530
29531   /* Terminology:
29532         in = the register pair containing the input value.
29533         out = the destination register pair.
29534         up = the high- or low-part of each pair.
29535         down = the opposite part to "up".
29536      In a shift, we can consider bits to shift from "up"-stream to
29537      "down"-stream, so in a left-shift "up" is the low-part and "down"
29538      is the high-part of each register pair.  */
29539
29540   rtx out_up   = code == ASHIFT ? out_low : out_high;
29541   rtx out_down = code == ASHIFT ? out_high : out_low;
29542   rtx in_up   = code == ASHIFT ? in_low : in_high;
29543   rtx in_down = code == ASHIFT ? in_high : in_low;
29544
29545   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29546   gcc_assert (out
29547               && (REG_P (out) || GET_CODE (out) == SUBREG)
29548               && GET_MODE (out) == DImode);
29549   gcc_assert (in
29550               && (REG_P (in) || GET_CODE (in) == SUBREG)
29551               && GET_MODE (in) == DImode);
29552   gcc_assert (amount
29553               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29554                    && GET_MODE (amount) == SImode)
29555                   || CONST_INT_P (amount)));
29556   gcc_assert (scratch1 == NULL
29557               || (GET_CODE (scratch1) == SCRATCH)
29558               || (GET_MODE (scratch1) == SImode
29559                   && REG_P (scratch1)));
29560   gcc_assert (scratch2 == NULL
29561               || (GET_CODE (scratch2) == SCRATCH)
29562               || (GET_MODE (scratch2) == SImode
29563                   && REG_P (scratch2)));
29564   gcc_assert (!REG_P (out) || !REG_P (amount)
29565               || !HARD_REGISTER_P (out)
29566               || (REGNO (out) != REGNO (amount)
29567                   && REGNO (out) + 1 != REGNO (amount)));
29568
29569   /* Macros to make following code more readable.  */
29570   #define SUB_32(DEST,SRC) \
29571             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29572   #define RSB_32(DEST,SRC) \
29573             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29574   #define SUB_S_32(DEST,SRC) \
29575             gen_addsi3_compare0 ((DEST), (SRC), \
29576                                  GEN_INT (-32))
29577   #define SET(DEST,SRC) \
29578             gen_rtx_SET ((DEST), (SRC))
29579   #define SHIFT(CODE,SRC,AMOUNT) \
29580             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29581   #define LSHIFT(CODE,SRC,AMOUNT) \
29582             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29583                             SImode, (SRC), (AMOUNT))
29584   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29585             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29586                             SImode, (SRC), (AMOUNT))
29587   #define ORR(A,B) \
29588             gen_rtx_IOR (SImode, (A), (B))
29589   #define BRANCH(COND,LABEL) \
29590             gen_arm_cond_branch ((LABEL), \
29591                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29592                                                    const0_rtx), \
29593                                  cc_reg)
29594
29595   /* Shifts by register and shifts by constant are handled separately.  */
29596   if (CONST_INT_P (amount))
29597     {
29598       /* We have a shift-by-constant.  */
29599
29600       /* First, handle out-of-range shift amounts.
29601          In both cases we try to match the result an ARM instruction in a
29602          shift-by-register would give.  This helps reduce execution
29603          differences between optimization levels, but it won't stop other
29604          parts of the compiler doing different things.  This is "undefined
29605          behavior, in any case.  */
29606       if (INTVAL (amount) <= 0)
29607         emit_insn (gen_movdi (out, in));
29608       else if (INTVAL (amount) >= 64)
29609         {
29610           if (code == ASHIFTRT)
29611             {
29612               rtx const31_rtx = GEN_INT (31);
29613               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29614               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29615             }
29616           else
29617             emit_insn (gen_movdi (out, const0_rtx));
29618         }
29619
29620       /* Now handle valid shifts. */
29621       else if (INTVAL (amount) < 32)
29622         {
29623           /* Shifts by a constant less than 32.  */
29624           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29625
29626           /* Clearing the out register in DImode first avoids lots
29627              of spilling and results in less stack usage.
29628              Later this redundant insn is completely removed.
29629              Do that only if "in" and "out" are different registers.  */
29630           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29631             emit_insn (SET (out, const0_rtx));
29632           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29633           emit_insn (SET (out_down,
29634                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29635                                out_down)));
29636           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29637         }
29638       else
29639         {
29640           /* Shifts by a constant greater than 31.  */
29641           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29642
29643           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29644             emit_insn (SET (out, const0_rtx));
29645           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29646           if (code == ASHIFTRT)
29647             emit_insn (gen_ashrsi3 (out_up, in_up,
29648                                     GEN_INT (31)));
29649           else
29650             emit_insn (SET (out_up, const0_rtx));
29651         }
29652     }
29653   else
29654     {
29655       /* We have a shift-by-register.  */
29656       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29657
29658       /* This alternative requires the scratch registers.  */
29659       gcc_assert (scratch1 && REG_P (scratch1));
29660       gcc_assert (scratch2 && REG_P (scratch2));
29661
29662       /* We will need the values "amount-32" and "32-amount" later.
29663          Swapping them around now allows the later code to be more general. */
29664       switch (code)
29665         {
29666         case ASHIFT:
29667           emit_insn (SUB_32 (scratch1, amount));
29668           emit_insn (RSB_32 (scratch2, amount));
29669           break;
29670         case ASHIFTRT:
29671           emit_insn (RSB_32 (scratch1, amount));
29672           /* Also set CC = amount > 32.  */
29673           emit_insn (SUB_S_32 (scratch2, amount));
29674           break;
29675         case LSHIFTRT:
29676           emit_insn (RSB_32 (scratch1, amount));
29677           emit_insn (SUB_32 (scratch2, amount));
29678           break;
29679         default:
29680           gcc_unreachable ();
29681         }
29682
29683       /* Emit code like this:
29684
29685          arithmetic-left:
29686             out_down = in_down << amount;
29687             out_down = (in_up << (amount - 32)) | out_down;
29688             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29689             out_up = in_up << amount;
29690
29691          arithmetic-right:
29692             out_down = in_down >> amount;
29693             out_down = (in_up << (32 - amount)) | out_down;
29694             if (amount < 32)
29695               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29696             out_up = in_up << amount;
29697
29698          logical-right:
29699             out_down = in_down >> amount;
29700             out_down = (in_up << (32 - amount)) | out_down;
29701             if (amount < 32)
29702               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29703             out_up = in_up << amount;
29704
29705           The ARM and Thumb2 variants are the same but implemented slightly
29706           differently.  If this were only called during expand we could just
29707           use the Thumb2 case and let combine do the right thing, but this
29708           can also be called from post-reload splitters.  */
29709
29710       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29711
29712       if (!TARGET_THUMB2)
29713         {
29714           /* Emit code for ARM mode.  */
29715           emit_insn (SET (out_down,
29716                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29717           if (code == ASHIFTRT)
29718             {
29719               rtx_code_label *done_label = gen_label_rtx ();
29720               emit_jump_insn (BRANCH (LT, done_label));
29721               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29722                                              out_down)));
29723               emit_label (done_label);
29724             }
29725           else
29726             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29727                                            out_down)));
29728         }
29729       else
29730         {
29731           /* Emit code for Thumb2 mode.
29732              Thumb2 can't do shift and or in one insn.  */
29733           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29734           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29735
29736           if (code == ASHIFTRT)
29737             {
29738               rtx_code_label *done_label = gen_label_rtx ();
29739               emit_jump_insn (BRANCH (LT, done_label));
29740               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29741               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29742               emit_label (done_label);
29743             }
29744           else
29745             {
29746               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29747               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29748             }
29749         }
29750
29751       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29752     }
29753
29754   #undef SUB_32
29755   #undef RSB_32
29756   #undef SUB_S_32
29757   #undef SET
29758   #undef SHIFT
29759   #undef LSHIFT
29760   #undef REV_LSHIFT
29761   #undef ORR
29762   #undef BRANCH
29763 }
29764
29765 /* Returns true if the pattern is a valid symbolic address, which is either a
29766    symbol_ref or (symbol_ref + addend).
29767
29768    According to the ARM ELF ABI, the initial addend of REL-type relocations
29769    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29770    literal field of the instruction as a 16-bit signed value in the range
29771    -32768 <= A < 32768.  */
29772
29773 bool
29774 arm_valid_symbolic_address_p (rtx addr)
29775 {
29776   rtx xop0, xop1 = NULL_RTX;
29777   rtx tmp = addr;
29778
29779   if (target_word_relocations)
29780     return false;
29781
29782   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29783     return true;
29784
29785   /* (const (plus: symbol_ref const_int))  */
29786   if (GET_CODE (addr) == CONST)
29787     tmp = XEXP (addr, 0);
29788
29789   if (GET_CODE (tmp) == PLUS)
29790     {
29791       xop0 = XEXP (tmp, 0);
29792       xop1 = XEXP (tmp, 1);
29793
29794       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29795           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29796     }
29797
29798   return false;
29799 }
29800
29801 /* Returns true if a valid comparison operation and makes
29802    the operands in a form that is valid.  */
29803 bool
29804 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29805 {
29806   enum rtx_code code = GET_CODE (*comparison);
29807   int code_int;
29808   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29809     ? GET_MODE (*op2) : GET_MODE (*op1);
29810
29811   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29812
29813   if (code == UNEQ || code == LTGT)
29814     return false;
29815
29816   code_int = (int)code;
29817   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29818   PUT_CODE (*comparison, (enum rtx_code)code_int);
29819
29820   switch (mode)
29821     {
29822     case E_SImode:
29823       if (!arm_add_operand (*op1, mode))
29824         *op1 = force_reg (mode, *op1);
29825       if (!arm_add_operand (*op2, mode))
29826         *op2 = force_reg (mode, *op2);
29827       return true;
29828
29829     case E_DImode:
29830       if (!cmpdi_operand (*op1, mode))
29831         *op1 = force_reg (mode, *op1);
29832       if (!cmpdi_operand (*op2, mode))
29833         *op2 = force_reg (mode, *op2);
29834       return true;
29835
29836     case E_HFmode:
29837       if (!TARGET_VFP_FP16INST)
29838         break;
29839       /* FP16 comparisons are done in SF mode.  */
29840       mode = SFmode;
29841       *op1 = convert_to_mode (mode, *op1, 1);
29842       *op2 = convert_to_mode (mode, *op2, 1);
29843       /* Fall through.  */
29844     case E_SFmode:
29845     case E_DFmode:
29846       if (!vfp_compare_operand (*op1, mode))
29847         *op1 = force_reg (mode, *op1);
29848       if (!vfp_compare_operand (*op2, mode))
29849         *op2 = force_reg (mode, *op2);
29850       return true;
29851     default:
29852       break;
29853     }
29854
29855   return false;
29856
29857 }
29858
29859 /* Maximum number of instructions to set block of memory.  */
29860 static int
29861 arm_block_set_max_insns (void)
29862 {
29863   if (optimize_function_for_size_p (cfun))
29864     return 4;
29865   else
29866     return current_tune->max_insns_inline_memset;
29867 }
29868
29869 /* Return TRUE if it's profitable to set block of memory for
29870    non-vectorized case.  VAL is the value to set the memory
29871    with.  LENGTH is the number of bytes to set.  ALIGN is the
29872    alignment of the destination memory in bytes.  UNALIGNED_P
29873    is TRUE if we can only set the memory with instructions
29874    meeting alignment requirements.  USE_STRD_P is TRUE if we
29875    can use strd to set the memory.  */
29876 static bool
29877 arm_block_set_non_vect_profit_p (rtx val,
29878                                  unsigned HOST_WIDE_INT length,
29879                                  unsigned HOST_WIDE_INT align,
29880                                  bool unaligned_p, bool use_strd_p)
29881 {
29882   int num = 0;
29883   /* For leftovers in bytes of 0-7, we can set the memory block using
29884      strb/strh/str with minimum instruction number.  */
29885   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29886
29887   if (unaligned_p)
29888     {
29889       num = arm_const_inline_cost (SET, val);
29890       num += length / align + length % align;
29891     }
29892   else if (use_strd_p)
29893     {
29894       num = arm_const_double_inline_cost (val);
29895       num += (length >> 3) + leftover[length & 7];
29896     }
29897   else
29898     {
29899       num = arm_const_inline_cost (SET, val);
29900       num += (length >> 2) + leftover[length & 3];
29901     }
29902
29903   /* We may be able to combine last pair STRH/STRB into a single STR
29904      by shifting one byte back.  */
29905   if (unaligned_access && length > 3 && (length & 3) == 3)
29906     num--;
29907
29908   return (num <= arm_block_set_max_insns ());
29909 }
29910
29911 /* Return TRUE if it's profitable to set block of memory for
29912    vectorized case.  LENGTH is the number of bytes to set.
29913    ALIGN is the alignment of destination memory in bytes.
29914    MODE is the vector mode used to set the memory.  */
29915 static bool
29916 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29917                              unsigned HOST_WIDE_INT align,
29918                              machine_mode mode)
29919 {
29920   int num;
29921   bool unaligned_p = ((align & 3) != 0);
29922   unsigned int nelt = GET_MODE_NUNITS (mode);
29923
29924   /* Instruction loading constant value.  */
29925   num = 1;
29926   /* Instructions storing the memory.  */
29927   num += (length + nelt - 1) / nelt;
29928   /* Instructions adjusting the address expression.  Only need to
29929      adjust address expression if it's 4 bytes aligned and bytes
29930      leftover can only be stored by mis-aligned store instruction.  */
29931   if (!unaligned_p && (length & 3) != 0)
29932     num++;
29933
29934   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29935   if (!unaligned_p && mode == V16QImode)
29936     num--;
29937
29938   return (num <= arm_block_set_max_insns ());
29939 }
29940
29941 /* Set a block of memory using vectorization instructions for the
29942    unaligned case.  We fill the first LENGTH bytes of the memory
29943    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29944    the alignment requirement of memory.  Return TRUE if succeeded.  */
29945 static bool
29946 arm_block_set_unaligned_vect (rtx dstbase,
29947                               unsigned HOST_WIDE_INT length,
29948                               unsigned HOST_WIDE_INT value,
29949                               unsigned HOST_WIDE_INT align)
29950 {
29951   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29952   rtx dst, mem;
29953   rtx val_vec, reg;
29954   rtx (*gen_func) (rtx, rtx);
29955   machine_mode mode;
29956   unsigned HOST_WIDE_INT v = value;
29957   unsigned int offset = 0;
29958   gcc_assert ((align & 0x3) != 0);
29959   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29960   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29961   if (length >= nelt_v16)
29962     {
29963       mode = V16QImode;
29964       gen_func = gen_movmisalignv16qi;
29965     }
29966   else
29967     {
29968       mode = V8QImode;
29969       gen_func = gen_movmisalignv8qi;
29970     }
29971   nelt_mode = GET_MODE_NUNITS (mode);
29972   gcc_assert (length >= nelt_mode);
29973   /* Skip if it isn't profitable.  */
29974   if (!arm_block_set_vect_profit_p (length, align, mode))
29975     return false;
29976
29977   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29978   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29979
29980   v = sext_hwi (v, BITS_PER_WORD);
29981
29982   reg = gen_reg_rtx (mode);
29983   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29984   /* Emit instruction loading the constant value.  */
29985   emit_move_insn (reg, val_vec);
29986
29987   /* Handle nelt_mode bytes in a vector.  */
29988   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29989     {
29990       emit_insn ((*gen_func) (mem, reg));
29991       if (i + 2 * nelt_mode <= length)
29992         {
29993           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29994           offset += nelt_mode;
29995           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29996         }
29997     }
29998
29999   /* If there are not less than nelt_v8 bytes leftover, we must be in
30000      V16QI mode.  */
30001   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30002
30003   /* Handle (8, 16) bytes leftover.  */
30004   if (i + nelt_v8 < length)
30005     {
30006       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30007       offset += length - i;
30008       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30009
30010       /* We are shifting bytes back, set the alignment accordingly.  */
30011       if ((length & 1) != 0 && align >= 2)
30012         set_mem_align (mem, BITS_PER_UNIT);
30013
30014       emit_insn (gen_movmisalignv16qi (mem, reg));
30015     }
30016   /* Handle (0, 8] bytes leftover.  */
30017   else if (i < length && i + nelt_v8 >= length)
30018     {
30019       if (mode == V16QImode)
30020         reg = gen_lowpart (V8QImode, reg);
30021
30022       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30023                                               + (nelt_mode - nelt_v8))));
30024       offset += (length - i) + (nelt_mode - nelt_v8);
30025       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30026
30027       /* We are shifting bytes back, set the alignment accordingly.  */
30028       if ((length & 1) != 0 && align >= 2)
30029         set_mem_align (mem, BITS_PER_UNIT);
30030
30031       emit_insn (gen_movmisalignv8qi (mem, reg));
30032     }
30033
30034   return true;
30035 }
30036
30037 /* Set a block of memory using vectorization instructions for the
30038    aligned case.  We fill the first LENGTH bytes of the memory area
30039    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30040    alignment requirement of memory.  Return TRUE if succeeded.  */
30041 static bool
30042 arm_block_set_aligned_vect (rtx dstbase,
30043                             unsigned HOST_WIDE_INT length,
30044                             unsigned HOST_WIDE_INT value,
30045                             unsigned HOST_WIDE_INT align)
30046 {
30047   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30048   rtx dst, addr, mem;
30049   rtx val_vec, reg;
30050   machine_mode mode;
30051   unsigned HOST_WIDE_INT v = value;
30052   unsigned int offset = 0;
30053
30054   gcc_assert ((align & 0x3) == 0);
30055   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30056   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30057   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30058     mode = V16QImode;
30059   else
30060     mode = V8QImode;
30061
30062   nelt_mode = GET_MODE_NUNITS (mode);
30063   gcc_assert (length >= nelt_mode);
30064   /* Skip if it isn't profitable.  */
30065   if (!arm_block_set_vect_profit_p (length, align, mode))
30066     return false;
30067
30068   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30069
30070   v = sext_hwi (v, BITS_PER_WORD);
30071
30072   reg = gen_reg_rtx (mode);
30073   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30074   /* Emit instruction loading the constant value.  */
30075   emit_move_insn (reg, val_vec);
30076
30077   i = 0;
30078   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30079   if (mode == V16QImode)
30080     {
30081       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30082       emit_insn (gen_movmisalignv16qi (mem, reg));
30083       i += nelt_mode;
30084       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30085       if (i + nelt_v8 < length && i + nelt_v16 > length)
30086         {
30087           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30088           offset += length - nelt_mode;
30089           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30090           /* We are shifting bytes back, set the alignment accordingly.  */
30091           if ((length & 0x3) == 0)
30092             set_mem_align (mem, BITS_PER_UNIT * 4);
30093           else if ((length & 0x1) == 0)
30094             set_mem_align (mem, BITS_PER_UNIT * 2);
30095           else
30096             set_mem_align (mem, BITS_PER_UNIT);
30097
30098           emit_insn (gen_movmisalignv16qi (mem, reg));
30099           return true;
30100         }
30101       /* Fall through for bytes leftover.  */
30102       mode = V8QImode;
30103       nelt_mode = GET_MODE_NUNITS (mode);
30104       reg = gen_lowpart (V8QImode, reg);
30105     }
30106
30107   /* Handle 8 bytes in a vector.  */
30108   for (; (i + nelt_mode <= length); i += nelt_mode)
30109     {
30110       addr = plus_constant (Pmode, dst, i);
30111       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30112       emit_move_insn (mem, reg);
30113     }
30114
30115   /* Handle single word leftover by shifting 4 bytes back.  We can
30116      use aligned access for this case.  */
30117   if (i + UNITS_PER_WORD == length)
30118     {
30119       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30120       offset += i - UNITS_PER_WORD;
30121       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30122       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30123       if (align > UNITS_PER_WORD)
30124         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30125
30126       emit_move_insn (mem, reg);
30127     }
30128   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30129      We have to use unaligned access for this case.  */
30130   else if (i < length)
30131     {
30132       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30133       offset += length - nelt_mode;
30134       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30135       /* We are shifting bytes back, set the alignment accordingly.  */
30136       if ((length & 1) == 0)
30137         set_mem_align (mem, BITS_PER_UNIT * 2);
30138       else
30139         set_mem_align (mem, BITS_PER_UNIT);
30140
30141       emit_insn (gen_movmisalignv8qi (mem, reg));
30142     }
30143
30144   return true;
30145 }
30146
30147 /* Set a block of memory using plain strh/strb instructions, only
30148    using instructions allowed by ALIGN on processor.  We fill the
30149    first LENGTH bytes of the memory area starting from DSTBASE
30150    with byte constant VALUE.  ALIGN is the alignment requirement
30151    of memory.  */
30152 static bool
30153 arm_block_set_unaligned_non_vect (rtx dstbase,
30154                                   unsigned HOST_WIDE_INT length,
30155                                   unsigned HOST_WIDE_INT value,
30156                                   unsigned HOST_WIDE_INT align)
30157 {
30158   unsigned int i;
30159   rtx dst, addr, mem;
30160   rtx val_exp, val_reg, reg;
30161   machine_mode mode;
30162   HOST_WIDE_INT v = value;
30163
30164   gcc_assert (align == 1 || align == 2);
30165
30166   if (align == 2)
30167     v |= (value << BITS_PER_UNIT);
30168
30169   v = sext_hwi (v, BITS_PER_WORD);
30170   val_exp = GEN_INT (v);
30171   /* Skip if it isn't profitable.  */
30172   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30173                                         align, true, false))
30174     return false;
30175
30176   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30177   mode = (align == 2 ? HImode : QImode);
30178   val_reg = force_reg (SImode, val_exp);
30179   reg = gen_lowpart (mode, val_reg);
30180
30181   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30182     {
30183       addr = plus_constant (Pmode, dst, i);
30184       mem = adjust_automodify_address (dstbase, mode, addr, i);
30185       emit_move_insn (mem, reg);
30186     }
30187
30188   /* Handle single byte leftover.  */
30189   if (i + 1 == length)
30190     {
30191       reg = gen_lowpart (QImode, val_reg);
30192       addr = plus_constant (Pmode, dst, i);
30193       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30194       emit_move_insn (mem, reg);
30195       i++;
30196     }
30197
30198   gcc_assert (i == length);
30199   return true;
30200 }
30201
30202 /* Set a block of memory using plain strd/str/strh/strb instructions,
30203    to permit unaligned copies on processors which support unaligned
30204    semantics for those instructions.  We fill the first LENGTH bytes
30205    of the memory area starting from DSTBASE with byte constant VALUE.
30206    ALIGN is the alignment requirement of memory.  */
30207 static bool
30208 arm_block_set_aligned_non_vect (rtx dstbase,
30209                                 unsigned HOST_WIDE_INT length,
30210                                 unsigned HOST_WIDE_INT value,
30211                                 unsigned HOST_WIDE_INT align)
30212 {
30213   unsigned int i;
30214   rtx dst, addr, mem;
30215   rtx val_exp, val_reg, reg;
30216   unsigned HOST_WIDE_INT v;
30217   bool use_strd_p;
30218
30219   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30220                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30221
30222   v = (value | (value << 8) | (value << 16) | (value << 24));
30223   if (length < UNITS_PER_WORD)
30224     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30225
30226   if (use_strd_p)
30227     v |= (v << BITS_PER_WORD);
30228   else
30229     v = sext_hwi (v, BITS_PER_WORD);
30230
30231   val_exp = GEN_INT (v);
30232   /* Skip if it isn't profitable.  */
30233   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30234                                         align, false, use_strd_p))
30235     {
30236       if (!use_strd_p)
30237         return false;
30238
30239       /* Try without strd.  */
30240       v = (v >> BITS_PER_WORD);
30241       v = sext_hwi (v, BITS_PER_WORD);
30242       val_exp = GEN_INT (v);
30243       use_strd_p = false;
30244       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30245                                             align, false, use_strd_p))
30246         return false;
30247     }
30248
30249   i = 0;
30250   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30251   /* Handle double words using strd if possible.  */
30252   if (use_strd_p)
30253     {
30254       val_reg = force_reg (DImode, val_exp);
30255       reg = val_reg;
30256       for (; (i + 8 <= length); i += 8)
30257         {
30258           addr = plus_constant (Pmode, dst, i);
30259           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30260           emit_move_insn (mem, reg);
30261         }
30262     }
30263   else
30264     val_reg = force_reg (SImode, val_exp);
30265
30266   /* Handle words.  */
30267   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30268   for (; (i + 4 <= length); i += 4)
30269     {
30270       addr = plus_constant (Pmode, dst, i);
30271       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30272       if ((align & 3) == 0)
30273         emit_move_insn (mem, reg);
30274       else
30275         emit_insn (gen_unaligned_storesi (mem, reg));
30276     }
30277
30278   /* Merge last pair of STRH and STRB into a STR if possible.  */
30279   if (unaligned_access && i > 0 && (i + 3) == length)
30280     {
30281       addr = plus_constant (Pmode, dst, i - 1);
30282       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30283       /* We are shifting one byte back, set the alignment accordingly.  */
30284       if ((align & 1) == 0)
30285         set_mem_align (mem, BITS_PER_UNIT);
30286
30287       /* Most likely this is an unaligned access, and we can't tell at
30288          compilation time.  */
30289       emit_insn (gen_unaligned_storesi (mem, reg));
30290       return true;
30291     }
30292
30293   /* Handle half word leftover.  */
30294   if (i + 2 <= length)
30295     {
30296       reg = gen_lowpart (HImode, val_reg);
30297       addr = plus_constant (Pmode, dst, i);
30298       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30299       if ((align & 1) == 0)
30300         emit_move_insn (mem, reg);
30301       else
30302         emit_insn (gen_unaligned_storehi (mem, reg));
30303
30304       i += 2;
30305     }
30306
30307   /* Handle single byte leftover.  */
30308   if (i + 1 == length)
30309     {
30310       reg = gen_lowpart (QImode, val_reg);
30311       addr = plus_constant (Pmode, dst, i);
30312       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30313       emit_move_insn (mem, reg);
30314     }
30315
30316   return true;
30317 }
30318
30319 /* Set a block of memory using vectorization instructions for both
30320    aligned and unaligned cases.  We fill the first LENGTH bytes of
30321    the memory area starting from DSTBASE with byte constant VALUE.
30322    ALIGN is the alignment requirement of memory.  */
30323 static bool
30324 arm_block_set_vect (rtx dstbase,
30325                     unsigned HOST_WIDE_INT length,
30326                     unsigned HOST_WIDE_INT value,
30327                     unsigned HOST_WIDE_INT align)
30328 {
30329   /* Check whether we need to use unaligned store instruction.  */
30330   if (((align & 3) != 0 || (length & 3) != 0)
30331       /* Check whether unaligned store instruction is available.  */
30332       && (!unaligned_access || BYTES_BIG_ENDIAN))
30333     return false;
30334
30335   if ((align & 3) == 0)
30336     return arm_block_set_aligned_vect (dstbase, length, value, align);
30337   else
30338     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30339 }
30340
30341 /* Expand string store operation.  Firstly we try to do that by using
30342    vectorization instructions, then try with ARM unaligned access and
30343    double-word store if profitable.  OPERANDS[0] is the destination,
30344    OPERANDS[1] is the number of bytes, operands[2] is the value to
30345    initialize the memory, OPERANDS[3] is the known alignment of the
30346    destination.  */
30347 bool
30348 arm_gen_setmem (rtx *operands)
30349 {
30350   rtx dstbase = operands[0];
30351   unsigned HOST_WIDE_INT length;
30352   unsigned HOST_WIDE_INT value;
30353   unsigned HOST_WIDE_INT align;
30354
30355   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30356     return false;
30357
30358   length = UINTVAL (operands[1]);
30359   if (length > 64)
30360     return false;
30361
30362   value = (UINTVAL (operands[2]) & 0xFF);
30363   align = UINTVAL (operands[3]);
30364   if (TARGET_NEON && length >= 8
30365       && current_tune->string_ops_prefer_neon
30366       && arm_block_set_vect (dstbase, length, value, align))
30367     return true;
30368
30369   if (!unaligned_access && (align & 3) != 0)
30370     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30371
30372   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30373 }
30374
30375
30376 static bool
30377 arm_macro_fusion_p (void)
30378 {
30379   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30380 }
30381
30382 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30383    for MOVW / MOVT macro fusion.  */
30384
30385 static bool
30386 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30387 {
30388   /* We are trying to fuse
30389      movw imm / movt imm
30390     instructions as a group that gets scheduled together.  */
30391
30392   rtx set_dest = SET_DEST (curr_set);
30393
30394   if (GET_MODE (set_dest) != SImode)
30395     return false;
30396
30397   /* We are trying to match:
30398      prev (movw)  == (set (reg r0) (const_int imm16))
30399      curr (movt) == (set (zero_extract (reg r0)
30400                                         (const_int 16)
30401                                         (const_int 16))
30402                           (const_int imm16_1))
30403      or
30404      prev (movw) == (set (reg r1)
30405                           (high (symbol_ref ("SYM"))))
30406     curr (movt) == (set (reg r0)
30407                         (lo_sum (reg r1)
30408                                 (symbol_ref ("SYM"))))  */
30409
30410     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30411       {
30412         if (CONST_INT_P (SET_SRC (curr_set))
30413             && CONST_INT_P (SET_SRC (prev_set))
30414             && REG_P (XEXP (set_dest, 0))
30415             && REG_P (SET_DEST (prev_set))
30416             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30417           return true;
30418
30419       }
30420     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30421              && REG_P (SET_DEST (curr_set))
30422              && REG_P (SET_DEST (prev_set))
30423              && GET_CODE (SET_SRC (prev_set)) == HIGH
30424              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30425       return true;
30426
30427   return false;
30428 }
30429
30430 static bool
30431 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30432 {
30433   rtx prev_set = single_set (prev);
30434   rtx curr_set = single_set (curr);
30435
30436   if (!prev_set
30437       || !curr_set)
30438     return false;
30439
30440   if (any_condjump_p (curr))
30441     return false;
30442
30443   if (!arm_macro_fusion_p ())
30444     return false;
30445
30446   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30447       && aarch_crypto_can_dual_issue (prev, curr))
30448     return true;
30449
30450   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30451       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30452     return true;
30453
30454   return false;
30455 }
30456
30457 /* Return true iff the instruction fusion described by OP is enabled.  */
30458 bool
30459 arm_fusion_enabled_p (tune_params::fuse_ops op)
30460 {
30461   return current_tune->fusible_ops & op;
30462 }
30463
30464 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30465    scheduled for speculative execution.  Reject the long-running division
30466    and square-root instructions.  */
30467
30468 static bool
30469 arm_sched_can_speculate_insn (rtx_insn *insn)
30470 {
30471   switch (get_attr_type (insn))
30472     {
30473       case TYPE_SDIV:
30474       case TYPE_UDIV:
30475       case TYPE_FDIVS:
30476       case TYPE_FDIVD:
30477       case TYPE_FSQRTS:
30478       case TYPE_FSQRTD:
30479       case TYPE_NEON_FP_SQRT_S:
30480       case TYPE_NEON_FP_SQRT_D:
30481       case TYPE_NEON_FP_SQRT_S_Q:
30482       case TYPE_NEON_FP_SQRT_D_Q:
30483       case TYPE_NEON_FP_DIV_S:
30484       case TYPE_NEON_FP_DIV_D:
30485       case TYPE_NEON_FP_DIV_S_Q:
30486       case TYPE_NEON_FP_DIV_D_Q:
30487         return false;
30488       default:
30489         return true;
30490     }
30491 }
30492
30493 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30494
30495 static unsigned HOST_WIDE_INT
30496 arm_asan_shadow_offset (void)
30497 {
30498   return HOST_WIDE_INT_1U << 29;
30499 }
30500
30501
30502 /* This is a temporary fix for PR60655.  Ideally we need
30503    to handle most of these cases in the generic part but
30504    currently we reject minus (..) (sym_ref).  We try to
30505    ameliorate the case with minus (sym_ref1) (sym_ref2)
30506    where they are in the same section.  */
30507
30508 static bool
30509 arm_const_not_ok_for_debug_p (rtx p)
30510 {
30511   tree decl_op0 = NULL;
30512   tree decl_op1 = NULL;
30513
30514   if (GET_CODE (p) == UNSPEC)
30515     return true;
30516   if (GET_CODE (p) == MINUS)
30517     {
30518       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30519         {
30520           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30521           if (decl_op1
30522               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30523               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30524             {
30525               if ((VAR_P (decl_op1)
30526                    || TREE_CODE (decl_op1) == CONST_DECL)
30527                   && (VAR_P (decl_op0)
30528                       || TREE_CODE (decl_op0) == CONST_DECL))
30529                 return (get_variable_section (decl_op1, false)
30530                         != get_variable_section (decl_op0, false));
30531
30532               if (TREE_CODE (decl_op1) == LABEL_DECL
30533                   && TREE_CODE (decl_op0) == LABEL_DECL)
30534                 return (DECL_CONTEXT (decl_op1)
30535                         != DECL_CONTEXT (decl_op0));
30536             }
30537
30538           return true;
30539         }
30540     }
30541
30542   return false;
30543 }
30544
30545 /* return TRUE if x is a reference to a value in a constant pool */
30546 extern bool
30547 arm_is_constant_pool_ref (rtx x)
30548 {
30549   return (MEM_P (x)
30550           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30551           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30552 }
30553
30554 /* Remember the last target of arm_set_current_function.  */
30555 static GTY(()) tree arm_previous_fndecl;
30556
30557 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30558
30559 void
30560 save_restore_target_globals (tree new_tree)
30561 {
30562   /* If we have a previous state, use it.  */
30563   if (TREE_TARGET_GLOBALS (new_tree))
30564     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30565   else if (new_tree == target_option_default_node)
30566     restore_target_globals (&default_target_globals);
30567   else
30568     {
30569       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30570       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30571     }
30572
30573   arm_option_params_internal ();
30574 }
30575
30576 /* Invalidate arm_previous_fndecl.  */
30577
30578 void
30579 arm_reset_previous_fndecl (void)
30580 {
30581   arm_previous_fndecl = NULL_TREE;
30582 }
30583
30584 /* Establish appropriate back-end context for processing the function
30585    FNDECL.  The argument might be NULL to indicate processing at top
30586    level, outside of any function scope.  */
30587
30588 static void
30589 arm_set_current_function (tree fndecl)
30590 {
30591   if (!fndecl || fndecl == arm_previous_fndecl)
30592     return;
30593
30594   tree old_tree = (arm_previous_fndecl
30595                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30596                    : NULL_TREE);
30597
30598   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30599
30600   /* If current function has no attributes but previous one did,
30601      use the default node.  */
30602   if (! new_tree && old_tree)
30603     new_tree = target_option_default_node;
30604
30605   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30606      the default have been handled by save_restore_target_globals from
30607      arm_pragma_target_parse.  */
30608   if (old_tree == new_tree)
30609     return;
30610
30611   arm_previous_fndecl = fndecl;
30612
30613   /* First set the target options.  */
30614   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30615
30616   save_restore_target_globals (new_tree);
30617 }
30618
30619 /* Implement TARGET_OPTION_PRINT.  */
30620
30621 static void
30622 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30623 {
30624   int flags = ptr->x_target_flags;
30625   const char *fpu_name;
30626
30627   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30628               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30629
30630   fprintf (file, "%*sselected isa %s\n", indent, "",
30631            TARGET_THUMB2_P (flags) ? "thumb2" :
30632            TARGET_THUMB_P (flags) ? "thumb1" :
30633            "arm");
30634
30635   if (ptr->x_arm_arch_string)
30636     fprintf (file, "%*sselected architecture %s\n", indent, "",
30637              ptr->x_arm_arch_string);
30638
30639   if (ptr->x_arm_cpu_string)
30640     fprintf (file, "%*sselected CPU %s\n", indent, "",
30641              ptr->x_arm_cpu_string);
30642
30643   if (ptr->x_arm_tune_string)
30644     fprintf (file, "%*sselected tune %s\n", indent, "",
30645              ptr->x_arm_tune_string);
30646
30647   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30648 }
30649
30650 /* Hook to determine if one function can safely inline another.  */
30651
30652 static bool
30653 arm_can_inline_p (tree caller, tree callee)
30654 {
30655   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30656   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30657   bool can_inline = true;
30658
30659   struct cl_target_option *caller_opts
30660         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30661                                            : target_option_default_node);
30662
30663   struct cl_target_option *callee_opts
30664         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30665                                            : target_option_default_node);
30666
30667   if (callee_opts == caller_opts)
30668     return true;
30669
30670   /* Callee's ISA features should be a subset of the caller's.  */
30671   struct arm_build_target caller_target;
30672   struct arm_build_target callee_target;
30673   caller_target.isa = sbitmap_alloc (isa_num_bits);
30674   callee_target.isa = sbitmap_alloc (isa_num_bits);
30675
30676   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30677                               false);
30678   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30679                               false);
30680   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30681     can_inline = false;
30682
30683   sbitmap_free (caller_target.isa);
30684   sbitmap_free (callee_target.isa);
30685
30686   /* OK to inline between different modes.
30687      Function with mode specific instructions, e.g using asm,
30688      must be explicitly protected with noinline.  */
30689   return can_inline;
30690 }
30691
30692 /* Hook to fix function's alignment affected by target attribute.  */
30693
30694 static void
30695 arm_relayout_function (tree fndecl)
30696 {
30697   if (DECL_USER_ALIGN (fndecl))
30698     return;
30699
30700   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30701
30702   if (!callee_tree)
30703     callee_tree = target_option_default_node;
30704
30705   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30706   SET_DECL_ALIGN
30707     (fndecl,
30708      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30709 }
30710
30711 /* Inner function to process the attribute((target(...))), take an argument and
30712    set the current options from the argument.  If we have a list, recursively
30713    go over the list.  */
30714
30715 static bool
30716 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30717 {
30718   if (TREE_CODE (args) == TREE_LIST)
30719     {
30720       bool ret = true;
30721
30722       for (; args; args = TREE_CHAIN (args))
30723         if (TREE_VALUE (args)
30724             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30725           ret = false;
30726       return ret;
30727     }
30728
30729   else if (TREE_CODE (args) != STRING_CST)
30730     {
30731       error ("attribute %<target%> argument not a string");
30732       return false;
30733     }
30734
30735   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30736   char *q;
30737
30738   while ((q = strtok (argstr, ",")) != NULL)
30739     {
30740       while (ISSPACE (*q)) ++q;
30741
30742       argstr = NULL;
30743       if (!strncmp (q, "thumb", 5))
30744           opts->x_target_flags |= MASK_THUMB;
30745
30746       else if (!strncmp (q, "arm", 3))
30747           opts->x_target_flags &= ~MASK_THUMB;
30748
30749       else if (!strncmp (q, "fpu=", 4))
30750         {
30751           int fpu_index;
30752           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30753                                        &fpu_index, CL_TARGET))
30754             {
30755               error ("invalid fpu for target attribute or pragma %qs", q);
30756               return false;
30757             }
30758           if (fpu_index == TARGET_FPU_auto)
30759             {
30760               /* This doesn't really make sense until we support
30761                  general dynamic selection of the architecture and all
30762                  sub-features.  */
30763               sorry ("auto fpu selection not currently permitted here");
30764               return false;
30765             }
30766           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30767         }
30768       else if (!strncmp (q, "arch=", 5))
30769         {
30770           char* arch = q+5;
30771           const arch_option *arm_selected_arch
30772              = arm_parse_arch_option_name (all_architectures, "arch", arch);
30773
30774           if (!arm_selected_arch)
30775             {
30776               error ("invalid architecture for target attribute or pragma %qs",
30777                      q);
30778               return false;
30779             }
30780
30781           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30782         }
30783       else if (q[0] == '+')
30784         {
30785           opts->x_arm_arch_string
30786             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30787         }
30788       else
30789         {
30790           error ("unknown target attribute or pragma %qs", q);
30791           return false;
30792         }
30793     }
30794
30795   return true;
30796 }
30797
30798 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30799
30800 tree
30801 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30802                                  struct gcc_options *opts_set)
30803 {
30804   struct cl_target_option cl_opts;
30805
30806   if (!arm_valid_target_attribute_rec (args, opts))
30807     return NULL_TREE;
30808
30809   cl_target_option_save (&cl_opts, opts);
30810   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30811   arm_option_check_internal (opts);
30812   /* Do any overrides, such as global options arch=xxx.
30813      We do this since arm_active_target was overridden.  */
30814   arm_option_reconfigure_globals ();
30815   arm_options_perform_arch_sanity_checks ();
30816   arm_option_override_internal (opts, opts_set);
30817
30818   return build_target_option_node (opts);
30819 }
30820
30821 static void
30822 add_attribute  (const char * mode, tree *attributes)
30823 {
30824   size_t len = strlen (mode);
30825   tree value = build_string (len, mode);
30826
30827   TREE_TYPE (value) = build_array_type (char_type_node,
30828                                         build_index_type (size_int (len)));
30829
30830   *attributes = tree_cons (get_identifier ("target"),
30831                            build_tree_list (NULL_TREE, value),
30832                            *attributes);
30833 }
30834
30835 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30836
30837 static void
30838 arm_insert_attributes (tree fndecl, tree * attributes)
30839 {
30840   const char *mode;
30841
30842   if (! TARGET_FLIP_THUMB)
30843     return;
30844
30845   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30846       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30847    return;
30848
30849   /* Nested definitions must inherit mode.  */
30850   if (current_function_decl)
30851    {
30852      mode = TARGET_THUMB ? "thumb" : "arm";
30853      add_attribute (mode, attributes);
30854      return;
30855    }
30856
30857   /* If there is already a setting don't change it.  */
30858   if (lookup_attribute ("target", *attributes) != NULL)
30859     return;
30860
30861   mode = thumb_flipper ? "thumb" : "arm";
30862   add_attribute (mode, attributes);
30863
30864   thumb_flipper = !thumb_flipper;
30865 }
30866
30867 /* Hook to validate attribute((target("string"))).  */
30868
30869 static bool
30870 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30871                               tree args, int ARG_UNUSED (flags))
30872 {
30873   bool ret = true;
30874   struct gcc_options func_options;
30875   tree cur_tree, new_optimize;
30876   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30877
30878   /* Get the optimization options of the current function.  */
30879   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30880
30881   /* If the function changed the optimization levels as well as setting target
30882      options, start with the optimizations specified.  */
30883   if (!func_optimize)
30884     func_optimize = optimization_default_node;
30885
30886   /* Init func_options.  */
30887   memset (&func_options, 0, sizeof (func_options));
30888   init_options_struct (&func_options, NULL);
30889   lang_hooks.init_options_struct (&func_options);
30890
30891   /* Initialize func_options to the defaults.  */
30892   cl_optimization_restore (&func_options,
30893                            TREE_OPTIMIZATION (func_optimize));
30894
30895   cl_target_option_restore (&func_options,
30896                             TREE_TARGET_OPTION (target_option_default_node));
30897
30898   /* Set func_options flags with new target mode.  */
30899   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30900                                               &global_options_set);
30901
30902   if (cur_tree == NULL_TREE)
30903     ret = false;
30904
30905   new_optimize = build_optimization_node (&func_options);
30906
30907   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30908
30909   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30910
30911   finalize_options_struct (&func_options);
30912
30913   return ret;
30914 }
30915
30916 /* Match an ISA feature bitmap to a named FPU.  We always use the
30917    first entry that exactly matches the feature set, so that we
30918    effectively canonicalize the FPU name for the assembler.  */
30919 static const char*
30920 arm_identify_fpu_from_isa (sbitmap isa)
30921 {
30922   auto_sbitmap fpubits (isa_num_bits);
30923   auto_sbitmap cand_fpubits (isa_num_bits);
30924
30925   bitmap_and (fpubits, isa, isa_all_fpubits);
30926
30927   /* If there are no ISA feature bits relating to the FPU, we must be
30928      doing soft-float.  */
30929   if (bitmap_empty_p (fpubits))
30930     return "softvfp";
30931
30932   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30933     {
30934       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30935       if (bitmap_equal_p (fpubits, cand_fpubits))
30936         return all_fpus[i].name;
30937     }
30938   /* We must find an entry, or things have gone wrong.  */
30939   gcc_unreachable ();
30940 }
30941
30942 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
30943    by the function fndecl.  */
30944 void
30945 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30946 {
30947   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30948
30949   struct cl_target_option *targ_options;
30950   if (target_parts)
30951     targ_options = TREE_TARGET_OPTION (target_parts);
30952   else
30953     targ_options = TREE_TARGET_OPTION (target_option_current_node);
30954   gcc_assert (targ_options);
30955
30956   /* Only update the assembler .arch string if it is distinct from the last
30957      such string we printed. arch_to_print is set conditionally in case
30958      targ_options->x_arm_arch_string is NULL which can be the case
30959      when cc1 is invoked directly without passing -march option.  */
30960   std::string arch_to_print;
30961   if (targ_options->x_arm_arch_string)
30962     arch_to_print = targ_options->x_arm_arch_string;
30963
30964   if (arch_to_print != arm_last_printed_arch_string)
30965     {
30966       std::string arch_name
30967         = arch_to_print.substr (0, arch_to_print.find ("+"));
30968       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30969       const arch_option *arch
30970         = arm_parse_arch_option_name (all_architectures, "-march",
30971                                       targ_options->x_arm_arch_string);
30972       auto_sbitmap opt_bits (isa_num_bits);
30973
30974       gcc_assert (arch);
30975       if (arch->common.extensions)
30976         {
30977           for (const struct cpu_arch_extension *opt = arch->common.extensions;
30978                opt->name != NULL;
30979                opt++)
30980             {
30981               if (!opt->remove)
30982                 {
30983                   arm_initialize_isa (opt_bits, opt->isa_bits);
30984                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30985                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30986                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30987                                  opt->name);
30988                 }
30989              }
30990         }
30991
30992       arm_last_printed_arch_string = arch_to_print;
30993     }
30994
30995   fprintf (stream, "\t.syntax unified\n");
30996
30997   if (TARGET_THUMB)
30998     {
30999       if (is_called_in_ARM_mode (decl)
31000           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31001               && cfun->is_thunk))
31002         fprintf (stream, "\t.code 32\n");
31003       else if (TARGET_THUMB1)
31004         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31005       else
31006         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31007     }
31008   else
31009     fprintf (stream, "\t.arm\n");
31010
31011   std::string fpu_to_print
31012     = TARGET_SOFT_FLOAT
31013         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31014
31015   if (fpu_to_print != arm_last_printed_arch_string)
31016     {
31017       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31018       arm_last_printed_fpu_string = fpu_to_print;
31019     }
31020
31021   if (TARGET_POKE_FUNCTION_NAME)
31022     arm_poke_function_name (stream, (const char *) name);
31023 }
31024
31025 /* If MEM is in the form of [base+offset], extract the two parts
31026    of address and set to BASE and OFFSET, otherwise return false
31027    after clearing BASE and OFFSET.  */
31028
31029 static bool
31030 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31031 {
31032   rtx addr;
31033
31034   gcc_assert (MEM_P (mem));
31035
31036   addr = XEXP (mem, 0);
31037
31038   /* Strip off const from addresses like (const (addr)).  */
31039   if (GET_CODE (addr) == CONST)
31040     addr = XEXP (addr, 0);
31041
31042   if (GET_CODE (addr) == REG)
31043     {
31044       *base = addr;
31045       *offset = const0_rtx;
31046       return true;
31047     }
31048
31049   if (GET_CODE (addr) == PLUS
31050       && GET_CODE (XEXP (addr, 0)) == REG
31051       && CONST_INT_P (XEXP (addr, 1)))
31052     {
31053       *base = XEXP (addr, 0);
31054       *offset = XEXP (addr, 1);
31055       return true;
31056     }
31057
31058   *base = NULL_RTX;
31059   *offset = NULL_RTX;
31060
31061   return false;
31062 }
31063
31064 /* If INSN is a load or store of address in the form of [base+offset],
31065    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31066    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31067    otherwise return FALSE.  */
31068
31069 static bool
31070 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31071 {
31072   rtx x, dest, src;
31073
31074   gcc_assert (INSN_P (insn));
31075   x = PATTERN (insn);
31076   if (GET_CODE (x) != SET)
31077     return false;
31078
31079   src = SET_SRC (x);
31080   dest = SET_DEST (x);
31081   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31082     {
31083       *is_load = false;
31084       extract_base_offset_in_addr (dest, base, offset);
31085     }
31086   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31087     {
31088       *is_load = true;
31089       extract_base_offset_in_addr (src, base, offset);
31090     }
31091   else
31092     return false;
31093
31094   return (*base != NULL_RTX && *offset != NULL_RTX);
31095 }
31096
31097 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31098
31099    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31100    and PRI are only calculated for these instructions.  For other instruction,
31101    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31102    instruction fusion can be supported by returning different priorities.
31103
31104    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31105
31106 static void
31107 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31108                            int *fusion_pri, int *pri)
31109 {
31110   int tmp, off_val;
31111   bool is_load;
31112   rtx base, offset;
31113
31114   gcc_assert (INSN_P (insn));
31115
31116   tmp = max_pri - 1;
31117   if (!fusion_load_store (insn, &base, &offset, &is_load))
31118     {
31119       *pri = tmp;
31120       *fusion_pri = tmp;
31121       return;
31122     }
31123
31124   /* Load goes first.  */
31125   if (is_load)
31126     *fusion_pri = tmp - 1;
31127   else
31128     *fusion_pri = tmp - 2;
31129
31130   tmp /= 2;
31131
31132   /* INSN with smaller base register goes first.  */
31133   tmp -= ((REGNO (base) & 0xff) << 20);
31134
31135   /* INSN with smaller offset goes first.  */
31136   off_val = (int)(INTVAL (offset));
31137   if (off_val >= 0)
31138     tmp -= (off_val & 0xfffff);
31139   else
31140     tmp += ((- off_val) & 0xfffff);
31141
31142   *pri = tmp;
31143   return;
31144 }
31145
31146
31147 /* Construct and return a PARALLEL RTX vector with elements numbering the
31148    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31149    the vector - from the perspective of the architecture.  This does not
31150    line up with GCC's perspective on lane numbers, so we end up with
31151    different masks depending on our target endian-ness.  The diagram
31152    below may help.  We must draw the distinction when building masks
31153    which select one half of the vector.  An instruction selecting
31154    architectural low-lanes for a big-endian target, must be described using
31155    a mask selecting GCC high-lanes.
31156
31157                  Big-Endian             Little-Endian
31158
31159 GCC             0   1   2   3           3   2   1   0
31160               | x | x | x | x |       | x | x | x | x |
31161 Architecture    3   2   1   0           3   2   1   0
31162
31163 Low Mask:         { 2, 3 }                { 0, 1 }
31164 High Mask:        { 0, 1 }                { 2, 3 }
31165 */
31166
31167 rtx
31168 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31169 {
31170   int nunits = GET_MODE_NUNITS (mode);
31171   rtvec v = rtvec_alloc (nunits / 2);
31172   int high_base = nunits / 2;
31173   int low_base = 0;
31174   int base;
31175   rtx t1;
31176   int i;
31177
31178   if (BYTES_BIG_ENDIAN)
31179     base = high ? low_base : high_base;
31180   else
31181     base = high ? high_base : low_base;
31182
31183   for (i = 0; i < nunits / 2; i++)
31184     RTVEC_ELT (v, i) = GEN_INT (base + i);
31185
31186   t1 = gen_rtx_PARALLEL (mode, v);
31187   return t1;
31188 }
31189
31190 /* Check OP for validity as a PARALLEL RTX vector with elements
31191    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31192    from the perspective of the architecture.  See the diagram above
31193    arm_simd_vect_par_cnst_half_p for more details.  */
31194
31195 bool
31196 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31197                                        bool high)
31198 {
31199   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31200   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31201   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31202   int i = 0;
31203
31204   if (!VECTOR_MODE_P (mode))
31205     return false;
31206
31207   if (count_op != count_ideal)
31208     return false;
31209
31210   for (i = 0; i < count_ideal; i++)
31211     {
31212       rtx elt_op = XVECEXP (op, 0, i);
31213       rtx elt_ideal = XVECEXP (ideal, 0, i);
31214
31215       if (!CONST_INT_P (elt_op)
31216           || INTVAL (elt_ideal) != INTVAL (elt_op))
31217         return false;
31218     }
31219   return true;
31220 }
31221
31222 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31223    in Thumb1.  */
31224 static bool
31225 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31226                          const_tree)
31227 {
31228   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31229   if (vcall_offset && TARGET_THUMB1)
31230     return false;
31231
31232   /* Otherwise ok.  */
31233   return true;
31234 }
31235
31236 /* Generate RTL for a conditional branch with rtx comparison CODE in
31237    mode CC_MODE. The destination of the unlikely conditional branch
31238    is LABEL_REF.  */
31239
31240 void
31241 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31242                           rtx label_ref)
31243 {
31244   rtx x;
31245   x = gen_rtx_fmt_ee (code, VOIDmode,
31246                       gen_rtx_REG (cc_mode, CC_REGNUM),
31247                       const0_rtx);
31248
31249   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31250                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31251                             pc_rtx);
31252   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31253 }
31254
31255 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31256
31257    For pure-code sections there is no letter code for this attribute, so
31258    output all the section flags numerically when this is needed.  */
31259
31260 static bool
31261 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31262 {
31263
31264   if (flags & SECTION_ARM_PURECODE)
31265     {
31266       *num = 0x20000000;
31267
31268       if (!(flags & SECTION_DEBUG))
31269         *num |= 0x2;
31270       if (flags & SECTION_EXCLUDE)
31271         *num |= 0x80000000;
31272       if (flags & SECTION_WRITE)
31273         *num |= 0x1;
31274       if (flags & SECTION_CODE)
31275         *num |= 0x4;
31276       if (flags & SECTION_MERGE)
31277         *num |= 0x10;
31278       if (flags & SECTION_STRINGS)
31279         *num |= 0x20;
31280       if (flags & SECTION_TLS)
31281         *num |= 0x400;
31282       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31283         *num |= 0x200;
31284
31285         return true;
31286     }
31287
31288   return false;
31289 }
31290
31291 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31292
31293    If pure-code is passed as an option, make sure all functions are in
31294    sections that have the SHF_ARM_PURECODE attribute.  */
31295
31296 static section *
31297 arm_function_section (tree decl, enum node_frequency freq,
31298                       bool startup, bool exit)
31299 {
31300   const char * section_name;
31301   section * sec;
31302
31303   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31304     return default_function_section (decl, freq, startup, exit);
31305
31306   if (!target_pure_code)
31307     return default_function_section (decl, freq, startup, exit);
31308
31309
31310   section_name = DECL_SECTION_NAME (decl);
31311
31312   /* If a function is not in a named section then it falls under the 'default'
31313      text section, also known as '.text'.  We can preserve previous behavior as
31314      the default text section already has the SHF_ARM_PURECODE section
31315      attribute.  */
31316   if (!section_name)
31317     {
31318       section *default_sec = default_function_section (decl, freq, startup,
31319                                                        exit);
31320
31321       /* If default_sec is not null, then it must be a special section like for
31322          example .text.startup.  We set the pure-code attribute and return the
31323          same section to preserve existing behavior.  */
31324       if (default_sec)
31325           default_sec->common.flags |= SECTION_ARM_PURECODE;
31326       return default_sec;
31327     }
31328
31329   /* Otherwise look whether a section has already been created with
31330      'section_name'.  */
31331   sec = get_named_section (decl, section_name, 0);
31332   if (!sec)
31333     /* If that is not the case passing NULL as the section's name to
31334        'get_named_section' will create a section with the declaration's
31335        section name.  */
31336     sec = get_named_section (decl, NULL, 0);
31337
31338   /* Set the SHF_ARM_PURECODE attribute.  */
31339   sec->common.flags |= SECTION_ARM_PURECODE;
31340
31341   return sec;
31342 }
31343
31344 /* Implements the TARGET_SECTION_FLAGS hook.
31345
31346    If DECL is a function declaration and pure-code is passed as an option
31347    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31348    section's name and RELOC indicates whether the declarations initializer may
31349    contain runtime relocations.  */
31350
31351 static unsigned int
31352 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31353 {
31354   unsigned int flags = default_section_type_flags (decl, name, reloc);
31355
31356   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31357     flags |= SECTION_ARM_PURECODE;
31358
31359   return flags;
31360 }
31361
31362 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31363
31364 static void
31365 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31366                            rtx op0, rtx op1,
31367                            rtx *quot_p, rtx *rem_p)
31368 {
31369   if (mode == SImode)
31370     gcc_assert (!TARGET_IDIV);
31371
31372   scalar_int_mode libval_mode
31373     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31374
31375   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31376                                         libval_mode,
31377                                         op0, GET_MODE (op0),
31378                                         op1, GET_MODE (op1));
31379
31380   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31381   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31382                                        GET_MODE_SIZE (mode));
31383
31384   gcc_assert (quotient);
31385   gcc_assert (remainder);
31386
31387   *quot_p = quotient;
31388   *rem_p = remainder;
31389 }
31390
31391 /*  This function checks for the availability of the coprocessor builtin passed
31392     in BUILTIN for the current target.  Returns true if it is available and
31393     false otherwise.  If a BUILTIN is passed for which this function has not
31394     been implemented it will cause an exception.  */
31395
31396 bool
31397 arm_coproc_builtin_available (enum unspecv builtin)
31398 {
31399   /* None of these builtins are available in Thumb mode if the target only
31400      supports Thumb-1.  */
31401   if (TARGET_THUMB1)
31402     return false;
31403
31404   switch (builtin)
31405     {
31406       case VUNSPEC_CDP:
31407       case VUNSPEC_LDC:
31408       case VUNSPEC_LDCL:
31409       case VUNSPEC_STC:
31410       case VUNSPEC_STCL:
31411       case VUNSPEC_MCR:
31412       case VUNSPEC_MRC:
31413         if (arm_arch4)
31414           return true;
31415         break;
31416       case VUNSPEC_CDP2:
31417       case VUNSPEC_LDC2:
31418       case VUNSPEC_LDC2L:
31419       case VUNSPEC_STC2:
31420       case VUNSPEC_STC2L:
31421       case VUNSPEC_MCR2:
31422       case VUNSPEC_MRC2:
31423         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31424            ARMv8-{A,M}.  */
31425         if (arm_arch5t)
31426           return true;
31427         break;
31428       case VUNSPEC_MCRR:
31429       case VUNSPEC_MRRC:
31430         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31431            ARMv8-{A,M}.  */
31432         if (arm_arch6 || arm_arch5te)
31433           return true;
31434         break;
31435       case VUNSPEC_MCRR2:
31436       case VUNSPEC_MRRC2:
31437         if (arm_arch6)
31438           return true;
31439         break;
31440       default:
31441         gcc_unreachable ();
31442     }
31443   return false;
31444 }
31445
31446 /* This function returns true if OP is a valid memory operand for the ldc and
31447    stc coprocessor instructions and false otherwise.  */
31448
31449 bool
31450 arm_coproc_ldc_stc_legitimate_address (rtx op)
31451 {
31452   HOST_WIDE_INT range;
31453   /* Has to be a memory operand.  */
31454   if (!MEM_P (op))
31455     return false;
31456
31457   op = XEXP (op, 0);
31458
31459   /* We accept registers.  */
31460   if (REG_P (op))
31461     return true;
31462
31463   switch GET_CODE (op)
31464     {
31465       case PLUS:
31466         {
31467           /* Or registers with an offset.  */
31468           if (!REG_P (XEXP (op, 0)))
31469             return false;
31470
31471           op = XEXP (op, 1);
31472
31473           /* The offset must be an immediate though.  */
31474           if (!CONST_INT_P (op))
31475             return false;
31476
31477           range = INTVAL (op);
31478
31479           /* Within the range of [-1020,1020].  */
31480           if (!IN_RANGE (range, -1020, 1020))
31481             return false;
31482
31483           /* And a multiple of 4.  */
31484           return (range % 4) == 0;
31485         }
31486       case PRE_INC:
31487       case POST_INC:
31488       case PRE_DEC:
31489       case POST_DEC:
31490         return REG_P (XEXP (op, 0));
31491       default:
31492         gcc_unreachable ();
31493     }
31494   return false;
31495 }
31496
31497 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31498
31499    In VFPv1, VFP registers could only be accessed in the mode they were
31500    set, so subregs would be invalid there.  However, we don't support
31501    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31502
31503    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31504    VFP registers in little-endian order.  We can't describe that accurately to
31505    GCC, so avoid taking subregs of such values.
31506
31507    The only exception is going from a 128-bit to a 64-bit type.  In that
31508    case the data layout happens to be consistent for big-endian, so we
31509    explicitly allow that case.  */
31510
31511 static bool
31512 arm_can_change_mode_class (machine_mode from, machine_mode to,
31513                            reg_class_t rclass)
31514 {
31515   if (TARGET_BIG_END
31516       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31517       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31518           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31519       && reg_classes_intersect_p (VFP_REGS, rclass))
31520     return false;
31521   return true;
31522 }
31523
31524 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31525    strcpy from constants will be faster.  */
31526
31527 static HOST_WIDE_INT
31528 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31529 {
31530   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31531   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31532     return MAX (align, BITS_PER_WORD * factor);
31533   return align;
31534 }
31535
31536 #if CHECKING_P
31537 namespace selftest {
31538
31539 /* Scan the static data tables generated by parsecpu.awk looking for
31540    potential issues with the data.  We primarily check for
31541    inconsistencies in the option extensions at present (extensions
31542    that duplicate others but aren't marked as aliases).  Furthermore,
31543    for correct canonicalization later options must never be a subset
31544    of an earlier option.  Any extension should also only specify other
31545    feature bits and never an architecture bit.  The architecture is inferred
31546    from the declaration of the extension.  */
31547 static void
31548 arm_test_cpu_arch_data (void)
31549 {
31550   const arch_option *arch;
31551   const cpu_option *cpu;
31552   auto_sbitmap target_isa (isa_num_bits);
31553   auto_sbitmap isa1 (isa_num_bits);
31554   auto_sbitmap isa2 (isa_num_bits);
31555
31556   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31557     {
31558       const cpu_arch_extension *ext1, *ext2;
31559
31560       if (arch->common.extensions == NULL)
31561         continue;
31562
31563       arm_initialize_isa (target_isa, arch->common.isa_bits);
31564
31565       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31566         {
31567           if (ext1->alias)
31568             continue;
31569
31570           arm_initialize_isa (isa1, ext1->isa_bits);
31571           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31572             {
31573               if (ext2->alias || ext1->remove != ext2->remove)
31574                 continue;
31575
31576               arm_initialize_isa (isa2, ext2->isa_bits);
31577               /* If the option is a subset of the parent option, it doesn't
31578                  add anything and so isn't useful.  */
31579               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31580
31581               /* If the extension specifies any architectural bits then
31582                  disallow it.  Extensions should only specify feature bits.  */
31583               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31584             }
31585         }
31586     }
31587
31588   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31589     {
31590       const cpu_arch_extension *ext1, *ext2;
31591
31592       if (cpu->common.extensions == NULL)
31593         continue;
31594
31595       arm_initialize_isa (target_isa, arch->common.isa_bits);
31596
31597       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31598         {
31599           if (ext1->alias)
31600             continue;
31601
31602           arm_initialize_isa (isa1, ext1->isa_bits);
31603           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31604             {
31605               if (ext2->alias || ext1->remove != ext2->remove)
31606                 continue;
31607
31608               arm_initialize_isa (isa2, ext2->isa_bits);
31609               /* If the option is a subset of the parent option, it doesn't
31610                  add anything and so isn't useful.  */
31611               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31612
31613               /* If the extension specifies any architectural bits then
31614                  disallow it.  Extensions should only specify feature bits.  */
31615               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31616             }
31617         }
31618     }
31619 }
31620
31621 /* Scan the static data tables generated by parsecpu.awk looking for
31622    potential issues with the data.  Here we check for consistency between the
31623    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31624    a feature bit that is not defined by any FPU flag.  */
31625 static void
31626 arm_test_fpu_data (void)
31627 {
31628   auto_sbitmap isa_all_fpubits (isa_num_bits);
31629   auto_sbitmap fpubits (isa_num_bits);
31630   auto_sbitmap tmpset (isa_num_bits);
31631
31632   static const enum isa_feature fpu_bitlist[]
31633     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31634   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31635
31636   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31637   {
31638     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31639     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31640     bitmap_clear (isa_all_fpubits);
31641     bitmap_copy (isa_all_fpubits, tmpset);
31642   }
31643
31644   if (!bitmap_empty_p (isa_all_fpubits))
31645     {
31646         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31647                          " group that are not defined by any FPU.\n"
31648                          "       Check your arm-cpus.in.\n");
31649         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31650     }
31651 }
31652
31653 static void
31654 arm_run_selftests (void)
31655 {
31656   arm_test_cpu_arch_data ();
31657   arm_test_fpu_data ();
31658 }
31659 } /* Namespace selftest.  */
31660
31661 #undef TARGET_RUN_TARGET_SELFTESTS
31662 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31663 #endif /* CHECKING_P */
31664
31665 struct gcc_target targetm = TARGET_INITIALIZER;
31666
31667 #include "gt-arm.h"