gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "params.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73
  74 /* This file should be included last.  */
  75 #include "target-def.h"
  76
  77 /* Forward definitions of types.  */
  78 typedef struct minipool_node    Mnode;
  79 typedef struct minipool_fixup   Mfix;
  80
  81 void (*arm_lang_output_object_attributes_hook)(void);
  82
  83 struct four_ints
  84 {
  85   int i[4];
  86 };
  87
  88 /* Forward function declarations.  */
  89 static bool arm_const_not_ok_for_debug_p (rtx);
  90 static int arm_needs_doubleword_align (machine_mode, const_tree);
  91 static int arm_compute_static_chain_stack_bytes (void);
  92 static arm_stack_offsets *arm_get_frame_offsets (void);
  93 static void arm_compute_frame_layout (void);
  94 static void arm_add_gc_roots (void);
  95 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  96                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  97 static unsigned bit_count (unsigned long);
  98 static unsigned bitmap_popcount (const sbitmap);
  99 static int arm_address_register_rtx_p (rtx, int);
 100 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 101 static bool is_called_in_ARM_mode (tree);
 102 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 103 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 104 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 105 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 106 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 107 inline static int thumb1_index_register_rtx_p (rtx, int);
 108 static int thumb_far_jump_used_p (void);
 109 static bool thumb_force_lr_save (void);
 110 static unsigned arm_size_return_regs (void);
 111 static bool arm_assemble_integer (rtx, unsigned int, int);
 112 static void arm_print_operand (FILE *, rtx, int);
 113 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 114 static bool arm_print_operand_punct_valid_p (unsigned char code);
 115 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 116 static arm_cc get_arm_condition_code (rtx);
 117 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 118 static const char *output_multi_immediate (rtx *, const char *, const char *,
 119                                            int, HOST_WIDE_INT);
 120 static const char *shift_op (rtx, HOST_WIDE_INT *);
 121 static struct machine_function *arm_init_machine_status (void);
 122 static void thumb_exit (FILE *, int);
 123 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 124 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 125 static Mnode *add_minipool_forward_ref (Mfix *);
 126 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 127 static Mnode *add_minipool_backward_ref (Mfix *);
 128 static void assign_minipool_offsets (Mfix *);
 129 static void arm_print_value (FILE *, rtx);
 130 static void dump_minipool (rtx_insn *);
 131 static int arm_barrier_cost (rtx_insn *);
 132 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 133 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 134 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 135                                machine_mode, rtx);
 136 static void arm_reorg (void);
 137 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 138 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 139 static unsigned long arm_compute_save_core_reg_mask (void);
 140 static unsigned long arm_isr_value (tree);
 141 static unsigned long arm_compute_func_type (void);
 142 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 143 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 144 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 146 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 147 #endif
 148 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 149 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 150 static void arm_output_function_epilogue (FILE *);
 151 static void arm_output_function_prologue (FILE *);
 152 static int arm_comp_type_attributes (const_tree, const_tree);
 153 static void arm_set_default_type_attributes (tree);
 154 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 155 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 156 static int optimal_immediate_sequence (enum rtx_code code,
 157                                        unsigned HOST_WIDE_INT val,
 158                                        struct four_ints *return_sequence);
 159 static int optimal_immediate_sequence_1 (enum rtx_code code,
 160                                          unsigned HOST_WIDE_INT val,
 161                                          struct four_ints *return_sequence,
 162                                          int i);
 163 static int arm_get_strip_length (int);
 164 static bool arm_function_ok_for_sibcall (tree, tree);
 165 static machine_mode arm_promote_function_mode (const_tree,
 166                                                     machine_mode, int *,
 167                                                     const_tree, int);
 168 static bool arm_return_in_memory (const_tree, const_tree);
 169 static rtx arm_function_value (const_tree, const_tree, bool);
 170 static rtx arm_libcall_value_1 (machine_mode);
 171 static rtx arm_libcall_value (machine_mode, const_rtx);
 172 static bool arm_function_value_regno_p (const unsigned int);
 173 static void arm_internal_label (FILE *, const char *, unsigned long);
 174 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 175                                  tree);
 176 static bool arm_have_conditional_execution (void);
 177 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 178 static bool arm_legitimate_constant_p (machine_mode, rtx);
 179 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 180 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 181 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 182 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 183 static void emit_constant_insn (rtx cond, rtx pattern);
 184 static rtx_insn *emit_set_insn (rtx, rtx);
 185 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 186 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 187                                   tree, bool);
 188 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 189                              const_tree, bool);
 190 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 191                                       const_tree, bool);
 192 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 193 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 194 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 195                                       const_tree);
 196 static rtx aapcs_libcall_value (machine_mode);
 197 static int aapcs_select_return_coproc (const_tree, const_tree);
 198
 199 #ifdef OBJECT_FORMAT_ELF
 200 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 201 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 202 #endif
 203 #ifndef ARM_PE
 204 static void arm_encode_section_info (tree, rtx, int);
 205 #endif
 206
 207 static void arm_file_end (void);
 208 static void arm_file_start (void);
 209 static void arm_insert_attributes (tree, tree *);
 210
 211 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 212                                         tree, int *, int);
 213 static bool arm_pass_by_reference (cumulative_args_t,
 214                                    machine_mode, const_tree, bool);
 215 static bool arm_promote_prototypes (const_tree);
 216 static bool arm_default_short_enums (void);
 217 static bool arm_align_anon_bitfield (void);
 218 static bool arm_return_in_msb (const_tree);
 219 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 220 static bool arm_return_in_memory (const_tree, const_tree);
 221 #if ARM_UNWIND_INFO
 222 static void arm_unwind_emit (FILE *, rtx_insn *);
 223 static bool arm_output_ttype (rtx);
 224 static void arm_asm_emit_except_personality (rtx);
 225 #endif
 226 static void arm_asm_init_sections (void);
 227 static rtx arm_dwarf_register_span (rtx);
 228
 229 static tree arm_cxx_guard_type (void);
 230 static bool arm_cxx_guard_mask_bit (void);
 231 static tree arm_get_cookie_size (tree);
 232 static bool arm_cookie_has_size (void);
 233 static bool arm_cxx_cdtor_returns_this (void);
 234 static bool arm_cxx_key_method_may_be_inline (void);
 235 static void arm_cxx_determine_class_data_visibility (tree);
 236 static bool arm_cxx_class_data_always_comdat (void);
 237 static bool arm_cxx_use_aeabi_atexit (void);
 238 static void arm_init_libfuncs (void);
 239 static tree arm_build_builtin_va_list (void);
 240 static void arm_expand_builtin_va_start (tree, rtx);
 241 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 242 static void arm_option_override (void);
 243 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 244 static void arm_option_restore (struct gcc_options *,
 245                                 struct cl_target_option *);
 246 static void arm_override_options_after_change (void);
 247 static void arm_option_print (FILE *, int, struct cl_target_option *);
 248 static void arm_set_current_function (tree);
 249 static bool arm_can_inline_p (tree, tree);
 250 static void arm_relayout_function (tree);
 251 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 252 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 253 static bool arm_sched_can_speculate_insn (rtx_insn *);
 254 static bool arm_macro_fusion_p (void);
 255 static bool arm_cannot_copy_insn_p (rtx_insn *);
 256 static int arm_issue_rate (void);
 257 static int arm_first_cycle_multipass_dfa_lookahead (void);
 258 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 259 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 260 static bool arm_output_addr_const_extra (FILE *, rtx);
 261 static bool arm_allocate_stack_slots_for_args (void);
 262 static bool arm_warn_func_return (tree);
 263 static tree arm_promoted_type (const_tree t);
 264 static bool arm_scalar_mode_supported_p (scalar_mode);
 265 static bool arm_frame_pointer_required (void);
 266 static bool arm_can_eliminate (const int, const int);
 267 static void arm_asm_trampoline_template (FILE *);
 268 static void arm_trampoline_init (rtx, tree, rtx);
 269 static rtx arm_trampoline_adjust_address (rtx);
 270 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 272 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 273 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 274 static bool arm_array_mode_supported_p (machine_mode,
 275                                         unsigned HOST_WIDE_INT);
 276 static machine_mode arm_preferred_simd_mode (scalar_mode);
 277 static bool arm_class_likely_spilled_p (reg_class_t);
 278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 281                                                      const_tree type,
 282                                                      int misalignment,
 283                                                      bool is_packed);
 284 static void arm_conditional_register_usage (void);
 285 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 286 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 287 static unsigned int arm_autovectorize_vector_sizes (void);
 288 static int arm_default_branch_cost (bool, bool);
 289 static int arm_cortex_a5_branch_cost (bool, bool);
 290 static int arm_cortex_m_branch_cost (bool, bool);
 291 static int arm_cortex_m7_branch_cost (bool, bool);
 292
 293 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 294
 295 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 296
 297 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 298                                            tree vectype,
 299                                            int misalign ATTRIBUTE_UNUSED);
 300 static unsigned arm_add_stmt_cost (void *data, int count,
 301                                    enum vect_cost_for_stmt kind,
 302                                    struct _stmt_vec_info *stmt_info,
 303                                    int misalign,
 304                                    enum vect_cost_model_location where);
 305
 306 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 307                                          bool op0_preserve_value);
 308 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 309
 310 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 311 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 312                                      const_tree);
 313 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 314 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 315 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 316                                                 int reloc);
 317 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 318 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 319 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 320 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 321 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 322 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 323 \f
 324 /* Table of machine attributes.  */
 325 static const struct attribute_spec arm_attribute_table[] =
 326 {
 327   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 328        affects_type_identity, handler, exclude } */
 329   /* Function calls made to this symbol must be done indirectly, because
 330      it may lie outside of the 26 bit addressing range of a normal function
 331      call.  */
 332   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 333   /* Whereas these functions are always known to reside within the 26 bit
 334      addressing range.  */
 335   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 336   /* Specify the procedure call conventions for a function.  */
 337   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 338     NULL },
 339   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 340   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 341     NULL },
 342   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 343     NULL },
 344   { "naked",        0, 0, true,  false, false, false,
 345     arm_handle_fndecl_attribute, NULL },
 346 #ifdef ARM_PE
 347   /* ARM/PE has three new attributes:
 348      interfacearm - ?
 349      dllexport - for exporting a function/variable that will live in a dll
 350      dllimport - for importing a function/variable from a dll
 351
 352      Microsoft allows multiple declspecs in one __declspec, separating
 353      them with spaces.  We do NOT support this.  Instead, use __declspec
 354      multiple times.
 355   */
 356   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 357   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 358   { "interfacearm", 0, 0, true,  false, false, false,
 359     arm_handle_fndecl_attribute, NULL },
 360 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 361   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 362     NULL },
 363   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 364     NULL },
 365   { "notshared",    0, 0, false, true, false, false,
 366     arm_handle_notshared_attribute, NULL },
 367 #endif
 368   /* ARMv8-M Security Extensions support.  */
 369   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 370     arm_handle_cmse_nonsecure_entry, NULL },
 371   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 372     arm_handle_cmse_nonsecure_call, NULL },
 373   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 374 };
 375 \f
 376 /* Initialize the GCC target structure.  */
 377 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 378 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 379 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 380 #endif
 381
 382 #undef TARGET_LEGITIMIZE_ADDRESS
 383 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 384
 385 #undef  TARGET_ATTRIBUTE_TABLE
 386 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 387
 388 #undef  TARGET_INSERT_ATTRIBUTES
 389 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 390
 391 #undef TARGET_ASM_FILE_START
 392 #define TARGET_ASM_FILE_START arm_file_start
 393 #undef TARGET_ASM_FILE_END
 394 #define TARGET_ASM_FILE_END arm_file_end
 395
 396 #undef  TARGET_ASM_ALIGNED_SI_OP
 397 #define TARGET_ASM_ALIGNED_SI_OP NULL
 398 #undef  TARGET_ASM_INTEGER
 399 #define TARGET_ASM_INTEGER arm_assemble_integer
 400
 401 #undef TARGET_PRINT_OPERAND
 402 #define TARGET_PRINT_OPERAND arm_print_operand
 403 #undef TARGET_PRINT_OPERAND_ADDRESS
 404 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 405 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 406 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 407
 408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 410
 411 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 412 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 413
 414 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 415 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 416
 417 #undef TARGET_CAN_INLINE_P
 418 #define TARGET_CAN_INLINE_P arm_can_inline_p
 419
 420 #undef TARGET_RELAYOUT_FUNCTION
 421 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 422
 423 #undef  TARGET_OPTION_OVERRIDE
 424 #define TARGET_OPTION_OVERRIDE arm_option_override
 425
 426 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 427 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 428
 429 #undef TARGET_OPTION_SAVE
 430 #define TARGET_OPTION_SAVE arm_option_save
 431
 432 #undef TARGET_OPTION_RESTORE
 433 #define TARGET_OPTION_RESTORE arm_option_restore
 434
 435 #undef TARGET_OPTION_PRINT
 436 #define TARGET_OPTION_PRINT arm_option_print
 437
 438 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 439 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 440
 441 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 442 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 443
 444 #undef TARGET_SCHED_MACRO_FUSION_P
 445 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 446
 447 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 448 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 449
 450 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 451 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 452
 453 #undef  TARGET_SCHED_ADJUST_COST
 454 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 455
 456 #undef TARGET_SET_CURRENT_FUNCTION
 457 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 458
 459 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 460 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 461
 462 #undef TARGET_SCHED_REORDER
 463 #define TARGET_SCHED_REORDER arm_sched_reorder
 464
 465 #undef TARGET_REGISTER_MOVE_COST
 466 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 467
 468 #undef TARGET_MEMORY_MOVE_COST
 469 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 470
 471 #undef TARGET_ENCODE_SECTION_INFO
 472 #ifdef ARM_PE
 473 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 474 #else
 475 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 476 #endif
 477
 478 #undef  TARGET_STRIP_NAME_ENCODING
 479 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 480
 481 #undef  TARGET_ASM_INTERNAL_LABEL
 482 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 483
 484 #undef TARGET_FLOATN_MODE
 485 #define TARGET_FLOATN_MODE arm_floatn_mode
 486
 487 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 488 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 489
 490 #undef  TARGET_FUNCTION_VALUE
 491 #define TARGET_FUNCTION_VALUE arm_function_value
 492
 493 #undef  TARGET_LIBCALL_VALUE
 494 #define TARGET_LIBCALL_VALUE arm_libcall_value
 495
 496 #undef TARGET_FUNCTION_VALUE_REGNO_P
 497 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 498
 499 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 500 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 501 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 502 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 503
 504 #undef  TARGET_RTX_COSTS
 505 #define TARGET_RTX_COSTS arm_rtx_costs
 506 #undef  TARGET_ADDRESS_COST
 507 #define TARGET_ADDRESS_COST arm_address_cost
 508
 509 #undef TARGET_SHIFT_TRUNCATION_MASK
 510 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 511 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 512 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 513 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 514 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 515 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 516 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 517 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 518 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 519   arm_autovectorize_vector_sizes
 520
 521 #undef  TARGET_MACHINE_DEPENDENT_REORG
 522 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 523
 524 #undef  TARGET_INIT_BUILTINS
 525 #define TARGET_INIT_BUILTINS  arm_init_builtins
 526 #undef  TARGET_EXPAND_BUILTIN
 527 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 528 #undef  TARGET_BUILTIN_DECL
 529 #define TARGET_BUILTIN_DECL arm_builtin_decl
 530
 531 #undef TARGET_INIT_LIBFUNCS
 532 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 533
 534 #undef TARGET_PROMOTE_FUNCTION_MODE
 535 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 536 #undef TARGET_PROMOTE_PROTOTYPES
 537 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 538 #undef TARGET_PASS_BY_REFERENCE
 539 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 540 #undef TARGET_ARG_PARTIAL_BYTES
 541 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 542 #undef TARGET_FUNCTION_ARG
 543 #define TARGET_FUNCTION_ARG arm_function_arg
 544 #undef TARGET_FUNCTION_ARG_ADVANCE
 545 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 546 #undef TARGET_FUNCTION_ARG_PADDING
 547 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 548 #undef TARGET_FUNCTION_ARG_BOUNDARY
 549 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 550
 551 #undef  TARGET_SETUP_INCOMING_VARARGS
 552 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 553
 554 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 555 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 556
 557 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 558 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 559 #undef TARGET_TRAMPOLINE_INIT
 560 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 561 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 562 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 563
 564 #undef TARGET_WARN_FUNC_RETURN
 565 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 566
 567 #undef TARGET_DEFAULT_SHORT_ENUMS
 568 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 569
 570 #undef TARGET_ALIGN_ANON_BITFIELD
 571 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 572
 573 #undef TARGET_NARROW_VOLATILE_BITFIELD
 574 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 575
 576 #undef TARGET_CXX_GUARD_TYPE
 577 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 578
 579 #undef TARGET_CXX_GUARD_MASK_BIT
 580 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 581
 582 #undef TARGET_CXX_GET_COOKIE_SIZE
 583 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 584
 585 #undef TARGET_CXX_COOKIE_HAS_SIZE
 586 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 587
 588 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 589 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 590
 591 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 592 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 593
 594 #undef TARGET_CXX_USE_AEABI_ATEXIT
 595 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 596
 597 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 598 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 599   arm_cxx_determine_class_data_visibility
 600
 601 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 602 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 603
 604 #undef TARGET_RETURN_IN_MSB
 605 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 606
 607 #undef TARGET_RETURN_IN_MEMORY
 608 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 609
 610 #undef TARGET_MUST_PASS_IN_STACK
 611 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 612
 613 #if ARM_UNWIND_INFO
 614 #undef TARGET_ASM_UNWIND_EMIT
 615 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 616
 617 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 618 #undef TARGET_ASM_TTYPE
 619 #define TARGET_ASM_TTYPE arm_output_ttype
 620
 621 #undef TARGET_ARM_EABI_UNWINDER
 622 #define TARGET_ARM_EABI_UNWINDER true
 623
 624 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 625 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 626
 627 #endif /* ARM_UNWIND_INFO */
 628
 629 #undef TARGET_ASM_INIT_SECTIONS
 630 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 631
 632 #undef TARGET_DWARF_REGISTER_SPAN
 633 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 634
 635 #undef  TARGET_CANNOT_COPY_INSN_P
 636 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 637
 638 #ifdef HAVE_AS_TLS
 639 #undef TARGET_HAVE_TLS
 640 #define TARGET_HAVE_TLS true
 641 #endif
 642
 643 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 644 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 645
 646 #undef TARGET_LEGITIMATE_CONSTANT_P
 647 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 648
 649 #undef TARGET_CANNOT_FORCE_CONST_MEM
 650 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 651
 652 #undef TARGET_MAX_ANCHOR_OFFSET
 653 #define TARGET_MAX_ANCHOR_OFFSET 4095
 654
 655 /* The minimum is set such that the total size of the block
 656    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 657    divisible by eight, ensuring natural spacing of anchors.  */
 658 #undef TARGET_MIN_ANCHOR_OFFSET
 659 #define TARGET_MIN_ANCHOR_OFFSET -4088
 660
 661 #undef TARGET_SCHED_ISSUE_RATE
 662 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 663
 664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 666   arm_first_cycle_multipass_dfa_lookahead
 667
 668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 670   arm_first_cycle_multipass_dfa_lookahead_guard
 671
 672 #undef TARGET_MANGLE_TYPE
 673 #define TARGET_MANGLE_TYPE arm_mangle_type
 674
 675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 677
 678 #undef TARGET_BUILD_BUILTIN_VA_LIST
 679 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 680 #undef TARGET_EXPAND_BUILTIN_VA_START
 681 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 682 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 683 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 684
 685 #ifdef HAVE_AS_TLS
 686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 688 #endif
 689
 690 #undef TARGET_LEGITIMATE_ADDRESS_P
 691 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 692
 693 #undef TARGET_PREFERRED_RELOAD_CLASS
 694 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 695
 696 #undef TARGET_PROMOTED_TYPE
 697 #define TARGET_PROMOTED_TYPE arm_promoted_type
 698
 699 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 700 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 701
 702 #undef TARGET_COMPUTE_FRAME_LAYOUT
 703 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 704
 705 #undef TARGET_FRAME_POINTER_REQUIRED
 706 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 707
 708 #undef TARGET_CAN_ELIMINATE
 709 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 710
 711 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 712 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 713
 714 #undef TARGET_CLASS_LIKELY_SPILLED_P
 715 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 716
 717 #undef TARGET_VECTORIZE_BUILTINS
 718 #define TARGET_VECTORIZE_BUILTINS
 719
 720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 722   arm_builtin_vectorized_function
 723
 724 #undef TARGET_VECTOR_ALIGNMENT
 725 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 726
 727 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 728 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 729   arm_vector_alignment_reachable
 730
 731 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 732 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 733   arm_builtin_support_vector_misalignment
 734
 735 #undef TARGET_PREFERRED_RENAME_CLASS
 736 #define TARGET_PREFERRED_RENAME_CLASS \
 737   arm_preferred_rename_class
 738
 739 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 740 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 741   arm_vectorize_vec_perm_const_ok
 742
 743 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 744 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 745   arm_builtin_vectorization_cost
 746 #undef TARGET_VECTORIZE_ADD_STMT_COST
 747 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 748
 749 #undef TARGET_CANONICALIZE_COMPARISON
 750 #define TARGET_CANONICALIZE_COMPARISON \
 751   arm_canonicalize_comparison
 752
 753 #undef TARGET_ASAN_SHADOW_OFFSET
 754 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 755
 756 #undef MAX_INSN_PER_IT_BLOCK
 757 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 758
 759 #undef TARGET_CAN_USE_DOLOOP_P
 760 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 761
 762 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 763 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 764
 765 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 766 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 767
 768 #undef TARGET_SCHED_FUSION_PRIORITY
 769 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 770
 771 #undef  TARGET_ASM_FUNCTION_SECTION
 772 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 773
 774 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 775 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 776
 777 #undef TARGET_SECTION_TYPE_FLAGS
 778 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 779
 780 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 781 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 782
 783 #undef TARGET_C_EXCESS_PRECISION
 784 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 785
 786 /* Although the architecture reserves bits 0 and 1, only the former is
 787    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 788 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 789 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 790
 791 #undef TARGET_FIXED_CONDITION_CODE_REGS
 792 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 793
 794 #undef TARGET_HARD_REGNO_NREGS
 795 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 796 #undef TARGET_HARD_REGNO_MODE_OK
 797 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 798
 799 #undef TARGET_MODES_TIEABLE_P
 800 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 801
 802 #undef TARGET_CAN_CHANGE_MODE_CLASS
 803 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 804
 805 #undef TARGET_CONSTANT_ALIGNMENT
 806 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 807 \f
 808 /* Obstack for minipool constant handling.  */
 809 static struct obstack minipool_obstack;
 810 static char *         minipool_startobj;
 811
 812 /* The maximum number of insns skipped which
 813    will be conditionalised if possible.  */
 814 static int max_insns_skipped = 5;
 815
 816 extern FILE * asm_out_file;
 817
 818 /* True if we are currently building a constant table.  */
 819 int making_const_table;
 820
 821 /* The processor for which instructions should be scheduled.  */
 822 enum processor_type arm_tune = TARGET_CPU_arm_none;
 823
 824 /* The current tuning set.  */
 825 const struct tune_params *current_tune;
 826
 827 /* Which floating point hardware to schedule for.  */
 828 int arm_fpu_attr;
 829
 830 /* Used for Thumb call_via trampolines.  */
 831 rtx thumb_call_via_label[14];
 832 static int thumb_call_reg_needed;
 833
 834 /* The bits in this mask specify which instruction scheduling options should
 835    be used.  */
 836 unsigned int tune_flags = 0;
 837
 838 /* The highest ARM architecture version supported by the
 839    target.  */
 840 enum base_architecture arm_base_arch = BASE_ARCH_0;
 841
 842 /* Active target architecture and tuning.  */
 843
 844 struct arm_build_target arm_active_target;
 845
 846 /* The following are used in the arm.md file as equivalents to bits
 847    in the above two flag variables.  */
 848
 849 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 850 int arm_arch3m = 0;
 851
 852 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 853 int arm_arch4 = 0;
 854
 855 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 856 int arm_arch4t = 0;
 857
 858 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 859 int arm_arch5 = 0;
 860
 861 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 862 int arm_arch5e = 0;
 863
 864 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 865 int arm_arch5te = 0;
 866
 867 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 868 int arm_arch6 = 0;
 869
 870 /* Nonzero if this chip supports the ARM 6K extensions.  */
 871 int arm_arch6k = 0;
 872
 873 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 874 int arm_arch6kz = 0;
 875
 876 /* Nonzero if instructions present in ARMv6-M can be used.  */
 877 int arm_arch6m = 0;
 878
 879 /* Nonzero if this chip supports the ARM 7 extensions.  */
 880 int arm_arch7 = 0;
 881
 882 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 883 int arm_arch_lpae = 0;
 884
 885 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 886 int arm_arch_notm = 0;
 887
 888 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 889 int arm_arch7em = 0;
 890
 891 /* Nonzero if instructions present in ARMv8 can be used.  */
 892 int arm_arch8 = 0;
 893
 894 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 895 int arm_arch8_1 = 0;
 896
 897 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 898 int arm_arch8_2 = 0;
 899
 900 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 901    Architecture 8.2.  */
 902 int arm_fp16_inst = 0;
 903
 904 /* Nonzero if this chip can benefit from load scheduling.  */
 905 int arm_ld_sched = 0;
 906
 907 /* Nonzero if this chip is a StrongARM.  */
 908 int arm_tune_strongarm = 0;
 909
 910 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 911 int arm_arch_iwmmxt = 0;
 912
 913 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 914 int arm_arch_iwmmxt2 = 0;
 915
 916 /* Nonzero if this chip is an XScale.  */
 917 int arm_arch_xscale = 0;
 918
 919 /* Nonzero if tuning for XScale  */
 920 int arm_tune_xscale = 0;
 921
 922 /* Nonzero if we want to tune for stores that access the write-buffer.
 923    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 924 int arm_tune_wbuf = 0;
 925
 926 /* Nonzero if tuning for Cortex-A9.  */
 927 int arm_tune_cortex_a9 = 0;
 928
 929 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 930    preprocessor.
 931    XXX This is a bit of a hack, it's intended to help work around
 932    problems in GLD which doesn't understand that armv5t code is
 933    interworking clean.  */
 934 int arm_cpp_interwork = 0;
 935
 936 /* Nonzero if chip supports Thumb 1.  */
 937 int arm_arch_thumb1;
 938
 939 /* Nonzero if chip supports Thumb 2.  */
 940 int arm_arch_thumb2;
 941
 942 /* Nonzero if chip supports integer division instruction.  */
 943 int arm_arch_arm_hwdiv;
 944 int arm_arch_thumb_hwdiv;
 945
 946 /* Nonzero if chip disallows volatile memory access in IT block.  */
 947 int arm_arch_no_volatile_ce;
 948
 949 /* Nonzero if we should use Neon to handle 64-bits operations rather
 950    than core registers.  */
 951 int prefer_neon_for_64bits = 0;
 952
 953 /* Nonzero if we shouldn't use literal pools.  */
 954 bool arm_disable_literal_pool = false;
 955
 956 /* The register number to be used for the PIC offset register.  */
 957 unsigned arm_pic_register = INVALID_REGNUM;
 958
 959 enum arm_pcs arm_pcs_default;
 960
 961 /* For an explanation of these variables, see final_prescan_insn below.  */
 962 int arm_ccfsm_state;
 963 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 964 enum arm_cond_code arm_current_cc;
 965
 966 rtx arm_target_insn;
 967 int arm_target_label;
 968 /* The number of conditionally executed insns, including the current insn.  */
 969 int arm_condexec_count = 0;
 970 /* A bitmask specifying the patterns for the IT block.
 971    Zero means do not output an IT block before this insn. */
 972 int arm_condexec_mask = 0;
 973 /* The number of bits used in arm_condexec_mask.  */
 974 int arm_condexec_masklen = 0;
 975
 976 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 977 int arm_arch_crc = 0;
 978
 979 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 980 int arm_arch_dotprod = 0;
 981
 982 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 983 int arm_arch_cmse = 0;
 984
 985 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 986 int arm_m_profile_small_mul = 0;
 987
 988 /* The condition codes of the ARM, and the inverse function.  */
 989 static const char * const arm_condition_codes[] =
 990 {
 991   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 992   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 993 };
 994
 995 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 996 int arm_regs_in_sequence[] =
 997 {
 998   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 999 };
1000
1001 #define ARM_LSL_NAME "lsl"
1002 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1003
1004 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1005                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1006                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1007 \f
1008 /* Initialization code.  */
1009
1010 struct cpu_tune
1011 {
1012   enum processor_type scheduler;
1013   unsigned int tune_flags;
1014   const struct tune_params *tune;
1015 };
1016
1017 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1018 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1019   {                                                             \
1020     num_slots,                                                  \
1021     l1_size,                                                    \
1022     l1_line_size                                                \
1023   }
1024
1025 /* arm generic vectorizer costs.  */
1026 static const
1027 struct cpu_vec_costs arm_default_vec_cost = {
1028   1,                                    /* scalar_stmt_cost.  */
1029   1,                                    /* scalar load_cost.  */
1030   1,                                    /* scalar_store_cost.  */
1031   1,                                    /* vec_stmt_cost.  */
1032   1,                                    /* vec_to_scalar_cost.  */
1033   1,                                    /* scalar_to_vec_cost.  */
1034   1,                                    /* vec_align_load_cost.  */
1035   1,                                    /* vec_unalign_load_cost.  */
1036   1,                                    /* vec_unalign_store_cost.  */
1037   1,                                    /* vec_store_cost.  */
1038   3,                                    /* cond_taken_branch_cost.  */
1039   1,                                    /* cond_not_taken_branch_cost.  */
1040 };
1041
1042 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1043 #include "aarch-cost-tables.h"
1044
1045
1046
1047 const struct cpu_cost_table cortexa9_extra_costs =
1048 {
1049   /* ALU */
1050   {
1051     0,                  /* arith.  */
1052     0,                  /* logical.  */
1053     0,                  /* shift.  */
1054     COSTS_N_INSNS (1),  /* shift_reg.  */
1055     COSTS_N_INSNS (1),  /* arith_shift.  */
1056     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1057     0,                  /* log_shift.  */
1058     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1059     COSTS_N_INSNS (1),  /* extend.  */
1060     COSTS_N_INSNS (2),  /* extend_arith.  */
1061     COSTS_N_INSNS (1),  /* bfi.  */
1062     COSTS_N_INSNS (1),  /* bfx.  */
1063     0,                  /* clz.  */
1064     0,                  /* rev.  */
1065     0,                  /* non_exec.  */
1066     true                /* non_exec_costs_exec.  */
1067   },
1068   {
1069     /* MULT SImode */
1070     {
1071       COSTS_N_INSNS (3),        /* simple.  */
1072       COSTS_N_INSNS (3),        /* flag_setting.  */
1073       COSTS_N_INSNS (2),        /* extend.  */
1074       COSTS_N_INSNS (3),        /* add.  */
1075       COSTS_N_INSNS (2),        /* extend_add.  */
1076       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1077     },
1078     /* MULT DImode */
1079     {
1080       0,                        /* simple (N/A).  */
1081       0,                        /* flag_setting (N/A).  */
1082       COSTS_N_INSNS (4),        /* extend.  */
1083       0,                        /* add (N/A).  */
1084       COSTS_N_INSNS (4),        /* extend_add.  */
1085       0                         /* idiv (N/A).  */
1086     }
1087   },
1088   /* LD/ST */
1089   {
1090     COSTS_N_INSNS (2),  /* load.  */
1091     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1092     COSTS_N_INSNS (2),  /* ldrd.  */
1093     COSTS_N_INSNS (2),  /* ldm_1st.  */
1094     1,                  /* ldm_regs_per_insn_1st.  */
1095     2,                  /* ldm_regs_per_insn_subsequent.  */
1096     COSTS_N_INSNS (5),  /* loadf.  */
1097     COSTS_N_INSNS (5),  /* loadd.  */
1098     COSTS_N_INSNS (1),  /* load_unaligned.  */
1099     COSTS_N_INSNS (2),  /* store.  */
1100     COSTS_N_INSNS (2),  /* strd.  */
1101     COSTS_N_INSNS (2),  /* stm_1st.  */
1102     1,                  /* stm_regs_per_insn_1st.  */
1103     2,                  /* stm_regs_per_insn_subsequent.  */
1104     COSTS_N_INSNS (1),  /* storef.  */
1105     COSTS_N_INSNS (1),  /* stored.  */
1106     COSTS_N_INSNS (1),  /* store_unaligned.  */
1107     COSTS_N_INSNS (1),  /* loadv.  */
1108     COSTS_N_INSNS (1)   /* storev.  */
1109   },
1110   {
1111     /* FP SFmode */
1112     {
1113       COSTS_N_INSNS (14),       /* div.  */
1114       COSTS_N_INSNS (4),        /* mult.  */
1115       COSTS_N_INSNS (7),        /* mult_addsub. */
1116       COSTS_N_INSNS (30),       /* fma.  */
1117       COSTS_N_INSNS (3),        /* addsub.  */
1118       COSTS_N_INSNS (1),        /* fpconst.  */
1119       COSTS_N_INSNS (1),        /* neg.  */
1120       COSTS_N_INSNS (3),        /* compare.  */
1121       COSTS_N_INSNS (3),        /* widen.  */
1122       COSTS_N_INSNS (3),        /* narrow.  */
1123       COSTS_N_INSNS (3),        /* toint.  */
1124       COSTS_N_INSNS (3),        /* fromint.  */
1125       COSTS_N_INSNS (3)         /* roundint.  */
1126     },
1127     /* FP DFmode */
1128     {
1129       COSTS_N_INSNS (24),       /* div.  */
1130       COSTS_N_INSNS (5),        /* mult.  */
1131       COSTS_N_INSNS (8),        /* mult_addsub.  */
1132       COSTS_N_INSNS (30),       /* fma.  */
1133       COSTS_N_INSNS (3),        /* addsub.  */
1134       COSTS_N_INSNS (1),        /* fpconst.  */
1135       COSTS_N_INSNS (1),        /* neg.  */
1136       COSTS_N_INSNS (3),        /* compare.  */
1137       COSTS_N_INSNS (3),        /* widen.  */
1138       COSTS_N_INSNS (3),        /* narrow.  */
1139       COSTS_N_INSNS (3),        /* toint.  */
1140       COSTS_N_INSNS (3),        /* fromint.  */
1141       COSTS_N_INSNS (3)         /* roundint.  */
1142     }
1143   },
1144   /* Vector */
1145   {
1146     COSTS_N_INSNS (1)   /* alu.  */
1147   }
1148 };
1149
1150 const struct cpu_cost_table cortexa8_extra_costs =
1151 {
1152   /* ALU */
1153   {
1154     0,                  /* arith.  */
1155     0,                  /* logical.  */
1156     COSTS_N_INSNS (1),  /* shift.  */
1157     0,                  /* shift_reg.  */
1158     COSTS_N_INSNS (1),  /* arith_shift.  */
1159     0,                  /* arith_shift_reg.  */
1160     COSTS_N_INSNS (1),  /* log_shift.  */
1161     0,                  /* log_shift_reg.  */
1162     0,                  /* extend.  */
1163     0,                  /* extend_arith.  */
1164     0,                  /* bfi.  */
1165     0,                  /* bfx.  */
1166     0,                  /* clz.  */
1167     0,                  /* rev.  */
1168     0,                  /* non_exec.  */
1169     true                /* non_exec_costs_exec.  */
1170   },
1171   {
1172     /* MULT SImode */
1173     {
1174       COSTS_N_INSNS (1),        /* simple.  */
1175       COSTS_N_INSNS (1),        /* flag_setting.  */
1176       COSTS_N_INSNS (1),        /* extend.  */
1177       COSTS_N_INSNS (1),        /* add.  */
1178       COSTS_N_INSNS (1),        /* extend_add.  */
1179       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1180     },
1181     /* MULT DImode */
1182     {
1183       0,                        /* simple (N/A).  */
1184       0,                        /* flag_setting (N/A).  */
1185       COSTS_N_INSNS (2),        /* extend.  */
1186       0,                        /* add (N/A).  */
1187       COSTS_N_INSNS (2),        /* extend_add.  */
1188       0                         /* idiv (N/A).  */
1189     }
1190   },
1191   /* LD/ST */
1192   {
1193     COSTS_N_INSNS (1),  /* load.  */
1194     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1195     COSTS_N_INSNS (1),  /* ldrd.  */
1196     COSTS_N_INSNS (1),  /* ldm_1st.  */
1197     1,                  /* ldm_regs_per_insn_1st.  */
1198     2,                  /* ldm_regs_per_insn_subsequent.  */
1199     COSTS_N_INSNS (1),  /* loadf.  */
1200     COSTS_N_INSNS (1),  /* loadd.  */
1201     COSTS_N_INSNS (1),  /* load_unaligned.  */
1202     COSTS_N_INSNS (1),  /* store.  */
1203     COSTS_N_INSNS (1),  /* strd.  */
1204     COSTS_N_INSNS (1),  /* stm_1st.  */
1205     1,                  /* stm_regs_per_insn_1st.  */
1206     2,                  /* stm_regs_per_insn_subsequent.  */
1207     COSTS_N_INSNS (1),  /* storef.  */
1208     COSTS_N_INSNS (1),  /* stored.  */
1209     COSTS_N_INSNS (1),  /* store_unaligned.  */
1210     COSTS_N_INSNS (1),  /* loadv.  */
1211     COSTS_N_INSNS (1)   /* storev.  */
1212   },
1213   {
1214     /* FP SFmode */
1215     {
1216       COSTS_N_INSNS (36),       /* div.  */
1217       COSTS_N_INSNS (11),       /* mult.  */
1218       COSTS_N_INSNS (20),       /* mult_addsub. */
1219       COSTS_N_INSNS (30),       /* fma.  */
1220       COSTS_N_INSNS (9),        /* addsub.  */
1221       COSTS_N_INSNS (3),        /* fpconst.  */
1222       COSTS_N_INSNS (3),        /* neg.  */
1223       COSTS_N_INSNS (6),        /* compare.  */
1224       COSTS_N_INSNS (4),        /* widen.  */
1225       COSTS_N_INSNS (4),        /* narrow.  */
1226       COSTS_N_INSNS (8),        /* toint.  */
1227       COSTS_N_INSNS (8),        /* fromint.  */
1228       COSTS_N_INSNS (8)         /* roundint.  */
1229     },
1230     /* FP DFmode */
1231     {
1232       COSTS_N_INSNS (64),       /* div.  */
1233       COSTS_N_INSNS (16),       /* mult.  */
1234       COSTS_N_INSNS (25),       /* mult_addsub.  */
1235       COSTS_N_INSNS (30),       /* fma.  */
1236       COSTS_N_INSNS (9),        /* addsub.  */
1237       COSTS_N_INSNS (3),        /* fpconst.  */
1238       COSTS_N_INSNS (3),        /* neg.  */
1239       COSTS_N_INSNS (6),        /* compare.  */
1240       COSTS_N_INSNS (6),        /* widen.  */
1241       COSTS_N_INSNS (6),        /* narrow.  */
1242       COSTS_N_INSNS (8),        /* toint.  */
1243       COSTS_N_INSNS (8),        /* fromint.  */
1244       COSTS_N_INSNS (8)         /* roundint.  */
1245     }
1246   },
1247   /* Vector */
1248   {
1249     COSTS_N_INSNS (1)   /* alu.  */
1250   }
1251 };
1252
1253 const struct cpu_cost_table cortexa5_extra_costs =
1254 {
1255   /* ALU */
1256   {
1257     0,                  /* arith.  */
1258     0,                  /* logical.  */
1259     COSTS_N_INSNS (1),  /* shift.  */
1260     COSTS_N_INSNS (1),  /* shift_reg.  */
1261     COSTS_N_INSNS (1),  /* arith_shift.  */
1262     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1263     COSTS_N_INSNS (1),  /* log_shift.  */
1264     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1265     COSTS_N_INSNS (1),  /* extend.  */
1266     COSTS_N_INSNS (1),  /* extend_arith.  */
1267     COSTS_N_INSNS (1),  /* bfi.  */
1268     COSTS_N_INSNS (1),  /* bfx.  */
1269     COSTS_N_INSNS (1),  /* clz.  */
1270     COSTS_N_INSNS (1),  /* rev.  */
1271     0,                  /* non_exec.  */
1272     true                /* non_exec_costs_exec.  */
1273   },
1274
1275   {
1276     /* MULT SImode */
1277     {
1278       0,                        /* simple.  */
1279       COSTS_N_INSNS (1),        /* flag_setting.  */
1280       COSTS_N_INSNS (1),        /* extend.  */
1281       COSTS_N_INSNS (1),        /* add.  */
1282       COSTS_N_INSNS (1),        /* extend_add.  */
1283       COSTS_N_INSNS (7)         /* idiv.  */
1284     },
1285     /* MULT DImode */
1286     {
1287       0,                        /* simple (N/A).  */
1288       0,                        /* flag_setting (N/A).  */
1289       COSTS_N_INSNS (1),        /* extend.  */
1290       0,                        /* add.  */
1291       COSTS_N_INSNS (2),        /* extend_add.  */
1292       0                         /* idiv (N/A).  */
1293     }
1294   },
1295   /* LD/ST */
1296   {
1297     COSTS_N_INSNS (1),  /* load.  */
1298     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1299     COSTS_N_INSNS (6),  /* ldrd.  */
1300     COSTS_N_INSNS (1),  /* ldm_1st.  */
1301     1,                  /* ldm_regs_per_insn_1st.  */
1302     2,                  /* ldm_regs_per_insn_subsequent.  */
1303     COSTS_N_INSNS (2),  /* loadf.  */
1304     COSTS_N_INSNS (4),  /* loadd.  */
1305     COSTS_N_INSNS (1),  /* load_unaligned.  */
1306     COSTS_N_INSNS (1),  /* store.  */
1307     COSTS_N_INSNS (3),  /* strd.  */
1308     COSTS_N_INSNS (1),  /* stm_1st.  */
1309     1,                  /* stm_regs_per_insn_1st.  */
1310     2,                  /* stm_regs_per_insn_subsequent.  */
1311     COSTS_N_INSNS (2),  /* storef.  */
1312     COSTS_N_INSNS (2),  /* stored.  */
1313     COSTS_N_INSNS (1),  /* store_unaligned.  */
1314     COSTS_N_INSNS (1),  /* loadv.  */
1315     COSTS_N_INSNS (1)   /* storev.  */
1316   },
1317   {
1318     /* FP SFmode */
1319     {
1320       COSTS_N_INSNS (15),       /* div.  */
1321       COSTS_N_INSNS (3),        /* mult.  */
1322       COSTS_N_INSNS (7),        /* mult_addsub. */
1323       COSTS_N_INSNS (7),        /* fma.  */
1324       COSTS_N_INSNS (3),        /* addsub.  */
1325       COSTS_N_INSNS (3),        /* fpconst.  */
1326       COSTS_N_INSNS (3),        /* neg.  */
1327       COSTS_N_INSNS (3),        /* compare.  */
1328       COSTS_N_INSNS (3),        /* widen.  */
1329       COSTS_N_INSNS (3),        /* narrow.  */
1330       COSTS_N_INSNS (3),        /* toint.  */
1331       COSTS_N_INSNS (3),        /* fromint.  */
1332       COSTS_N_INSNS (3)         /* roundint.  */
1333     },
1334     /* FP DFmode */
1335     {
1336       COSTS_N_INSNS (30),       /* div.  */
1337       COSTS_N_INSNS (6),        /* mult.  */
1338       COSTS_N_INSNS (10),       /* mult_addsub.  */
1339       COSTS_N_INSNS (7),        /* fma.  */
1340       COSTS_N_INSNS (3),        /* addsub.  */
1341       COSTS_N_INSNS (3),        /* fpconst.  */
1342       COSTS_N_INSNS (3),        /* neg.  */
1343       COSTS_N_INSNS (3),        /* compare.  */
1344       COSTS_N_INSNS (3),        /* widen.  */
1345       COSTS_N_INSNS (3),        /* narrow.  */
1346       COSTS_N_INSNS (3),        /* toint.  */
1347       COSTS_N_INSNS (3),        /* fromint.  */
1348       COSTS_N_INSNS (3)         /* roundint.  */
1349     }
1350   },
1351   /* Vector */
1352   {
1353     COSTS_N_INSNS (1)   /* alu.  */
1354   }
1355 };
1356
1357
1358 const struct cpu_cost_table cortexa7_extra_costs =
1359 {
1360   /* ALU */
1361   {
1362     0,                  /* arith.  */
1363     0,                  /* logical.  */
1364     COSTS_N_INSNS (1),  /* shift.  */
1365     COSTS_N_INSNS (1),  /* shift_reg.  */
1366     COSTS_N_INSNS (1),  /* arith_shift.  */
1367     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1368     COSTS_N_INSNS (1),  /* log_shift.  */
1369     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1370     COSTS_N_INSNS (1),  /* extend.  */
1371     COSTS_N_INSNS (1),  /* extend_arith.  */
1372     COSTS_N_INSNS (1),  /* bfi.  */
1373     COSTS_N_INSNS (1),  /* bfx.  */
1374     COSTS_N_INSNS (1),  /* clz.  */
1375     COSTS_N_INSNS (1),  /* rev.  */
1376     0,                  /* non_exec.  */
1377     true                /* non_exec_costs_exec.  */
1378   },
1379
1380   {
1381     /* MULT SImode */
1382     {
1383       0,                        /* simple.  */
1384       COSTS_N_INSNS (1),        /* flag_setting.  */
1385       COSTS_N_INSNS (1),        /* extend.  */
1386       COSTS_N_INSNS (1),        /* add.  */
1387       COSTS_N_INSNS (1),        /* extend_add.  */
1388       COSTS_N_INSNS (7)         /* idiv.  */
1389     },
1390     /* MULT DImode */
1391     {
1392       0,                        /* simple (N/A).  */
1393       0,                        /* flag_setting (N/A).  */
1394       COSTS_N_INSNS (1),        /* extend.  */
1395       0,                        /* add.  */
1396       COSTS_N_INSNS (2),        /* extend_add.  */
1397       0                         /* idiv (N/A).  */
1398     }
1399   },
1400   /* LD/ST */
1401   {
1402     COSTS_N_INSNS (1),  /* load.  */
1403     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1404     COSTS_N_INSNS (3),  /* ldrd.  */
1405     COSTS_N_INSNS (1),  /* ldm_1st.  */
1406     1,                  /* ldm_regs_per_insn_1st.  */
1407     2,                  /* ldm_regs_per_insn_subsequent.  */
1408     COSTS_N_INSNS (2),  /* loadf.  */
1409     COSTS_N_INSNS (2),  /* loadd.  */
1410     COSTS_N_INSNS (1),  /* load_unaligned.  */
1411     COSTS_N_INSNS (1),  /* store.  */
1412     COSTS_N_INSNS (3),  /* strd.  */
1413     COSTS_N_INSNS (1),  /* stm_1st.  */
1414     1,                  /* stm_regs_per_insn_1st.  */
1415     2,                  /* stm_regs_per_insn_subsequent.  */
1416     COSTS_N_INSNS (2),  /* storef.  */
1417     COSTS_N_INSNS (2),  /* stored.  */
1418     COSTS_N_INSNS (1),  /* store_unaligned.  */
1419     COSTS_N_INSNS (1),  /* loadv.  */
1420     COSTS_N_INSNS (1)   /* storev.  */
1421   },
1422   {
1423     /* FP SFmode */
1424     {
1425       COSTS_N_INSNS (15),       /* div.  */
1426       COSTS_N_INSNS (3),        /* mult.  */
1427       COSTS_N_INSNS (7),        /* mult_addsub. */
1428       COSTS_N_INSNS (7),        /* fma.  */
1429       COSTS_N_INSNS (3),        /* addsub.  */
1430       COSTS_N_INSNS (3),        /* fpconst.  */
1431       COSTS_N_INSNS (3),        /* neg.  */
1432       COSTS_N_INSNS (3),        /* compare.  */
1433       COSTS_N_INSNS (3),        /* widen.  */
1434       COSTS_N_INSNS (3),        /* narrow.  */
1435       COSTS_N_INSNS (3),        /* toint.  */
1436       COSTS_N_INSNS (3),        /* fromint.  */
1437       COSTS_N_INSNS (3)         /* roundint.  */
1438     },
1439     /* FP DFmode */
1440     {
1441       COSTS_N_INSNS (30),       /* div.  */
1442       COSTS_N_INSNS (6),        /* mult.  */
1443       COSTS_N_INSNS (10),       /* mult_addsub.  */
1444       COSTS_N_INSNS (7),        /* fma.  */
1445       COSTS_N_INSNS (3),        /* addsub.  */
1446       COSTS_N_INSNS (3),        /* fpconst.  */
1447       COSTS_N_INSNS (3),        /* neg.  */
1448       COSTS_N_INSNS (3),        /* compare.  */
1449       COSTS_N_INSNS (3),        /* widen.  */
1450       COSTS_N_INSNS (3),        /* narrow.  */
1451       COSTS_N_INSNS (3),        /* toint.  */
1452       COSTS_N_INSNS (3),        /* fromint.  */
1453       COSTS_N_INSNS (3)         /* roundint.  */
1454     }
1455   },
1456   /* Vector */
1457   {
1458     COSTS_N_INSNS (1)   /* alu.  */
1459   }
1460 };
1461
1462 const struct cpu_cost_table cortexa12_extra_costs =
1463 {
1464   /* ALU */
1465   {
1466     0,                  /* arith.  */
1467     0,                  /* logical.  */
1468     0,                  /* shift.  */
1469     COSTS_N_INSNS (1),  /* shift_reg.  */
1470     COSTS_N_INSNS (1),  /* arith_shift.  */
1471     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1472     COSTS_N_INSNS (1),  /* log_shift.  */
1473     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1474     0,                  /* extend.  */
1475     COSTS_N_INSNS (1),  /* extend_arith.  */
1476     0,                  /* bfi.  */
1477     COSTS_N_INSNS (1),  /* bfx.  */
1478     COSTS_N_INSNS (1),  /* clz.  */
1479     COSTS_N_INSNS (1),  /* rev.  */
1480     0,                  /* non_exec.  */
1481     true                /* non_exec_costs_exec.  */
1482   },
1483   /* MULT SImode */
1484   {
1485     {
1486       COSTS_N_INSNS (2),        /* simple.  */
1487       COSTS_N_INSNS (3),        /* flag_setting.  */
1488       COSTS_N_INSNS (2),        /* extend.  */
1489       COSTS_N_INSNS (3),        /* add.  */
1490       COSTS_N_INSNS (2),        /* extend_add.  */
1491       COSTS_N_INSNS (18)        /* idiv.  */
1492     },
1493     /* MULT DImode */
1494     {
1495       0,                        /* simple (N/A).  */
1496       0,                        /* flag_setting (N/A).  */
1497       COSTS_N_INSNS (3),        /* extend.  */
1498       0,                        /* add (N/A).  */
1499       COSTS_N_INSNS (3),        /* extend_add.  */
1500       0                         /* idiv (N/A).  */
1501     }
1502   },
1503   /* LD/ST */
1504   {
1505     COSTS_N_INSNS (3),  /* load.  */
1506     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1507     COSTS_N_INSNS (3),  /* ldrd.  */
1508     COSTS_N_INSNS (3),  /* ldm_1st.  */
1509     1,                  /* ldm_regs_per_insn_1st.  */
1510     2,                  /* ldm_regs_per_insn_subsequent.  */
1511     COSTS_N_INSNS (3),  /* loadf.  */
1512     COSTS_N_INSNS (3),  /* loadd.  */
1513     0,                  /* load_unaligned.  */
1514     0,                  /* store.  */
1515     0,                  /* strd.  */
1516     0,                  /* stm_1st.  */
1517     1,                  /* stm_regs_per_insn_1st.  */
1518     2,                  /* stm_regs_per_insn_subsequent.  */
1519     COSTS_N_INSNS (2),  /* storef.  */
1520     COSTS_N_INSNS (2),  /* stored.  */
1521     0,                  /* store_unaligned.  */
1522     COSTS_N_INSNS (1),  /* loadv.  */
1523     COSTS_N_INSNS (1)   /* storev.  */
1524   },
1525   {
1526     /* FP SFmode */
1527     {
1528       COSTS_N_INSNS (17),       /* div.  */
1529       COSTS_N_INSNS (4),        /* mult.  */
1530       COSTS_N_INSNS (8),        /* mult_addsub. */
1531       COSTS_N_INSNS (8),        /* fma.  */
1532       COSTS_N_INSNS (4),        /* addsub.  */
1533       COSTS_N_INSNS (2),        /* fpconst. */
1534       COSTS_N_INSNS (2),        /* neg.  */
1535       COSTS_N_INSNS (2),        /* compare.  */
1536       COSTS_N_INSNS (4),        /* widen.  */
1537       COSTS_N_INSNS (4),        /* narrow.  */
1538       COSTS_N_INSNS (4),        /* toint.  */
1539       COSTS_N_INSNS (4),        /* fromint.  */
1540       COSTS_N_INSNS (4)         /* roundint.  */
1541     },
1542     /* FP DFmode */
1543     {
1544       COSTS_N_INSNS (31),       /* div.  */
1545       COSTS_N_INSNS (4),        /* mult.  */
1546       COSTS_N_INSNS (8),        /* mult_addsub.  */
1547       COSTS_N_INSNS (8),        /* fma.  */
1548       COSTS_N_INSNS (4),        /* addsub.  */
1549       COSTS_N_INSNS (2),        /* fpconst.  */
1550       COSTS_N_INSNS (2),        /* neg.  */
1551       COSTS_N_INSNS (2),        /* compare.  */
1552       COSTS_N_INSNS (4),        /* widen.  */
1553       COSTS_N_INSNS (4),        /* narrow.  */
1554       COSTS_N_INSNS (4),        /* toint.  */
1555       COSTS_N_INSNS (4),        /* fromint.  */
1556       COSTS_N_INSNS (4)         /* roundint.  */
1557     }
1558   },
1559   /* Vector */
1560   {
1561     COSTS_N_INSNS (1)   /* alu.  */
1562   }
1563 };
1564
1565 const struct cpu_cost_table cortexa15_extra_costs =
1566 {
1567   /* ALU */
1568   {
1569     0,                  /* arith.  */
1570     0,                  /* logical.  */
1571     0,                  /* shift.  */
1572     0,                  /* shift_reg.  */
1573     COSTS_N_INSNS (1),  /* arith_shift.  */
1574     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1575     COSTS_N_INSNS (1),  /* log_shift.  */
1576     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1577     0,                  /* extend.  */
1578     COSTS_N_INSNS (1),  /* extend_arith.  */
1579     COSTS_N_INSNS (1),  /* bfi.  */
1580     0,                  /* bfx.  */
1581     0,                  /* clz.  */
1582     0,                  /* rev.  */
1583     0,                  /* non_exec.  */
1584     true                /* non_exec_costs_exec.  */
1585   },
1586   /* MULT SImode */
1587   {
1588     {
1589       COSTS_N_INSNS (2),        /* simple.  */
1590       COSTS_N_INSNS (3),        /* flag_setting.  */
1591       COSTS_N_INSNS (2),        /* extend.  */
1592       COSTS_N_INSNS (2),        /* add.  */
1593       COSTS_N_INSNS (2),        /* extend_add.  */
1594       COSTS_N_INSNS (18)        /* idiv.  */
1595     },
1596     /* MULT DImode */
1597     {
1598       0,                        /* simple (N/A).  */
1599       0,                        /* flag_setting (N/A).  */
1600       COSTS_N_INSNS (3),        /* extend.  */
1601       0,                        /* add (N/A).  */
1602       COSTS_N_INSNS (3),        /* extend_add.  */
1603       0                         /* idiv (N/A).  */
1604     }
1605   },
1606   /* LD/ST */
1607   {
1608     COSTS_N_INSNS (3),  /* load.  */
1609     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1610     COSTS_N_INSNS (3),  /* ldrd.  */
1611     COSTS_N_INSNS (4),  /* ldm_1st.  */
1612     1,                  /* ldm_regs_per_insn_1st.  */
1613     2,                  /* ldm_regs_per_insn_subsequent.  */
1614     COSTS_N_INSNS (4),  /* loadf.  */
1615     COSTS_N_INSNS (4),  /* loadd.  */
1616     0,                  /* load_unaligned.  */
1617     0,                  /* store.  */
1618     0,                  /* strd.  */
1619     COSTS_N_INSNS (1),  /* stm_1st.  */
1620     1,                  /* stm_regs_per_insn_1st.  */
1621     2,                  /* stm_regs_per_insn_subsequent.  */
1622     0,                  /* storef.  */
1623     0,                  /* stored.  */
1624     0,                  /* store_unaligned.  */
1625     COSTS_N_INSNS (1),  /* loadv.  */
1626     COSTS_N_INSNS (1)   /* storev.  */
1627   },
1628   {
1629     /* FP SFmode */
1630     {
1631       COSTS_N_INSNS (17),       /* div.  */
1632       COSTS_N_INSNS (4),        /* mult.  */
1633       COSTS_N_INSNS (8),        /* mult_addsub. */
1634       COSTS_N_INSNS (8),        /* fma.  */
1635       COSTS_N_INSNS (4),        /* addsub.  */
1636       COSTS_N_INSNS (2),        /* fpconst. */
1637       COSTS_N_INSNS (2),        /* neg.  */
1638       COSTS_N_INSNS (5),        /* compare.  */
1639       COSTS_N_INSNS (4),        /* widen.  */
1640       COSTS_N_INSNS (4),        /* narrow.  */
1641       COSTS_N_INSNS (4),        /* toint.  */
1642       COSTS_N_INSNS (4),        /* fromint.  */
1643       COSTS_N_INSNS (4)         /* roundint.  */
1644     },
1645     /* FP DFmode */
1646     {
1647       COSTS_N_INSNS (31),       /* div.  */
1648       COSTS_N_INSNS (4),        /* mult.  */
1649       COSTS_N_INSNS (8),        /* mult_addsub.  */
1650       COSTS_N_INSNS (8),        /* fma.  */
1651       COSTS_N_INSNS (4),        /* addsub.  */
1652       COSTS_N_INSNS (2),        /* fpconst.  */
1653       COSTS_N_INSNS (2),        /* neg.  */
1654       COSTS_N_INSNS (2),        /* compare.  */
1655       COSTS_N_INSNS (4),        /* widen.  */
1656       COSTS_N_INSNS (4),        /* narrow.  */
1657       COSTS_N_INSNS (4),        /* toint.  */
1658       COSTS_N_INSNS (4),        /* fromint.  */
1659       COSTS_N_INSNS (4)         /* roundint.  */
1660     }
1661   },
1662   /* Vector */
1663   {
1664     COSTS_N_INSNS (1)   /* alu.  */
1665   }
1666 };
1667
1668 const struct cpu_cost_table v7m_extra_costs =
1669 {
1670   /* ALU */
1671   {
1672     0,                  /* arith.  */
1673     0,                  /* logical.  */
1674     0,                  /* shift.  */
1675     0,                  /* shift_reg.  */
1676     0,                  /* arith_shift.  */
1677     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1678     0,                  /* log_shift.  */
1679     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1680     0,                  /* extend.  */
1681     COSTS_N_INSNS (1),  /* extend_arith.  */
1682     0,                  /* bfi.  */
1683     0,                  /* bfx.  */
1684     0,                  /* clz.  */
1685     0,                  /* rev.  */
1686     COSTS_N_INSNS (1),  /* non_exec.  */
1687     false               /* non_exec_costs_exec.  */
1688   },
1689   {
1690     /* MULT SImode */
1691     {
1692       COSTS_N_INSNS (1),        /* simple.  */
1693       COSTS_N_INSNS (1),        /* flag_setting.  */
1694       COSTS_N_INSNS (2),        /* extend.  */
1695       COSTS_N_INSNS (1),        /* add.  */
1696       COSTS_N_INSNS (3),        /* extend_add.  */
1697       COSTS_N_INSNS (8)         /* idiv.  */
1698     },
1699     /* MULT DImode */
1700     {
1701       0,                        /* simple (N/A).  */
1702       0,                        /* flag_setting (N/A).  */
1703       COSTS_N_INSNS (2),        /* extend.  */
1704       0,                        /* add (N/A).  */
1705       COSTS_N_INSNS (3),        /* extend_add.  */
1706       0                         /* idiv (N/A).  */
1707     }
1708   },
1709   /* LD/ST */
1710   {
1711     COSTS_N_INSNS (2),  /* load.  */
1712     0,                  /* load_sign_extend.  */
1713     COSTS_N_INSNS (3),  /* ldrd.  */
1714     COSTS_N_INSNS (2),  /* ldm_1st.  */
1715     1,                  /* ldm_regs_per_insn_1st.  */
1716     1,                  /* ldm_regs_per_insn_subsequent.  */
1717     COSTS_N_INSNS (2),  /* loadf.  */
1718     COSTS_N_INSNS (3),  /* loadd.  */
1719     COSTS_N_INSNS (1),  /* load_unaligned.  */
1720     COSTS_N_INSNS (2),  /* store.  */
1721     COSTS_N_INSNS (3),  /* strd.  */
1722     COSTS_N_INSNS (2),  /* stm_1st.  */
1723     1,                  /* stm_regs_per_insn_1st.  */
1724     1,                  /* stm_regs_per_insn_subsequent.  */
1725     COSTS_N_INSNS (2),  /* storef.  */
1726     COSTS_N_INSNS (3),  /* stored.  */
1727     COSTS_N_INSNS (1),  /* store_unaligned.  */
1728     COSTS_N_INSNS (1),  /* loadv.  */
1729     COSTS_N_INSNS (1)   /* storev.  */
1730   },
1731   {
1732     /* FP SFmode */
1733     {
1734       COSTS_N_INSNS (7),        /* div.  */
1735       COSTS_N_INSNS (2),        /* mult.  */
1736       COSTS_N_INSNS (5),        /* mult_addsub.  */
1737       COSTS_N_INSNS (3),        /* fma.  */
1738       COSTS_N_INSNS (1),        /* addsub.  */
1739       0,                        /* fpconst.  */
1740       0,                        /* neg.  */
1741       0,                        /* compare.  */
1742       0,                        /* widen.  */
1743       0,                        /* narrow.  */
1744       0,                        /* toint.  */
1745       0,                        /* fromint.  */
1746       0                         /* roundint.  */
1747     },
1748     /* FP DFmode */
1749     {
1750       COSTS_N_INSNS (15),       /* div.  */
1751       COSTS_N_INSNS (5),        /* mult.  */
1752       COSTS_N_INSNS (7),        /* mult_addsub.  */
1753       COSTS_N_INSNS (7),        /* fma.  */
1754       COSTS_N_INSNS (3),        /* addsub.  */
1755       0,                        /* fpconst.  */
1756       0,                        /* neg.  */
1757       0,                        /* compare.  */
1758       0,                        /* widen.  */
1759       0,                        /* narrow.  */
1760       0,                        /* toint.  */
1761       0,                        /* fromint.  */
1762       0                         /* roundint.  */
1763     }
1764   },
1765   /* Vector */
1766   {
1767     COSTS_N_INSNS (1)   /* alu.  */
1768   }
1769 };
1770
1771 const struct addr_mode_cost_table generic_addr_mode_costs =
1772 {
1773   /* int.  */
1774   {
1775     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1776     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1777     COSTS_N_INSNS (0)   /* AMO_WB.  */
1778   },
1779   /* float.  */
1780   {
1781     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1782     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1783     COSTS_N_INSNS (0)   /* AMO_WB.  */
1784   },
1785   /* vector.  */
1786   {
1787     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1788     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1789     COSTS_N_INSNS (0)   /* AMO_WB.  */
1790   }
1791 };
1792
1793 const struct tune_params arm_slowmul_tune =
1794 {
1795   &generic_extra_costs,                 /* Insn extra costs.  */
1796   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1797   NULL,                                 /* Sched adj cost.  */
1798   arm_default_branch_cost,
1799   &arm_default_vec_cost,
1800   3,                                            /* Constant limit.  */
1801   5,                                            /* Max cond insns.  */
1802   8,                                            /* Memset max inline.  */
1803   1,                                            /* Issue rate.  */
1804   ARM_PREFETCH_NOT_BENEFICIAL,
1805   tune_params::PREF_CONST_POOL_TRUE,
1806   tune_params::PREF_LDRD_FALSE,
1807   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1808   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1809   tune_params::DISPARAGE_FLAGS_NEITHER,
1810   tune_params::PREF_NEON_64_FALSE,
1811   tune_params::PREF_NEON_STRINGOPS_FALSE,
1812   tune_params::FUSE_NOTHING,
1813   tune_params::SCHED_AUTOPREF_OFF
1814 };
1815
1816 const struct tune_params arm_fastmul_tune =
1817 {
1818   &generic_extra_costs,                 /* Insn extra costs.  */
1819   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1820   NULL,                                 /* Sched adj cost.  */
1821   arm_default_branch_cost,
1822   &arm_default_vec_cost,
1823   1,                                            /* Constant limit.  */
1824   5,                                            /* Max cond insns.  */
1825   8,                                            /* Memset max inline.  */
1826   1,                                            /* Issue rate.  */
1827   ARM_PREFETCH_NOT_BENEFICIAL,
1828   tune_params::PREF_CONST_POOL_TRUE,
1829   tune_params::PREF_LDRD_FALSE,
1830   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1831   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1832   tune_params::DISPARAGE_FLAGS_NEITHER,
1833   tune_params::PREF_NEON_64_FALSE,
1834   tune_params::PREF_NEON_STRINGOPS_FALSE,
1835   tune_params::FUSE_NOTHING,
1836   tune_params::SCHED_AUTOPREF_OFF
1837 };
1838
1839 /* StrongARM has early execution of branches, so a sequence that is worth
1840    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1841
1842 const struct tune_params arm_strongarm_tune =
1843 {
1844   &generic_extra_costs,                 /* Insn extra costs.  */
1845   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1846   NULL,                                 /* Sched adj cost.  */
1847   arm_default_branch_cost,
1848   &arm_default_vec_cost,
1849   1,                                            /* Constant limit.  */
1850   3,                                            /* Max cond insns.  */
1851   8,                                            /* Memset max inline.  */
1852   1,                                            /* Issue rate.  */
1853   ARM_PREFETCH_NOT_BENEFICIAL,
1854   tune_params::PREF_CONST_POOL_TRUE,
1855   tune_params::PREF_LDRD_FALSE,
1856   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1857   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1858   tune_params::DISPARAGE_FLAGS_NEITHER,
1859   tune_params::PREF_NEON_64_FALSE,
1860   tune_params::PREF_NEON_STRINGOPS_FALSE,
1861   tune_params::FUSE_NOTHING,
1862   tune_params::SCHED_AUTOPREF_OFF
1863 };
1864
1865 const struct tune_params arm_xscale_tune =
1866 {
1867   &generic_extra_costs,                 /* Insn extra costs.  */
1868   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1869   xscale_sched_adjust_cost,
1870   arm_default_branch_cost,
1871   &arm_default_vec_cost,
1872   2,                                            /* Constant limit.  */
1873   3,                                            /* Max cond insns.  */
1874   8,                                            /* Memset max inline.  */
1875   1,                                            /* Issue rate.  */
1876   ARM_PREFETCH_NOT_BENEFICIAL,
1877   tune_params::PREF_CONST_POOL_TRUE,
1878   tune_params::PREF_LDRD_FALSE,
1879   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1880   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1881   tune_params::DISPARAGE_FLAGS_NEITHER,
1882   tune_params::PREF_NEON_64_FALSE,
1883   tune_params::PREF_NEON_STRINGOPS_FALSE,
1884   tune_params::FUSE_NOTHING,
1885   tune_params::SCHED_AUTOPREF_OFF
1886 };
1887
1888 const struct tune_params arm_9e_tune =
1889 {
1890   &generic_extra_costs,                 /* Insn extra costs.  */
1891   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   1,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_TRUE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_64_FALSE,
1906   tune_params::PREF_NEON_STRINGOPS_FALSE,
1907   tune_params::FUSE_NOTHING,
1908   tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911 const struct tune_params arm_marvell_pj4_tune =
1912 {
1913   &generic_extra_costs,                 /* Insn extra costs.  */
1914   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1915   NULL,                                 /* Sched adj cost.  */
1916   arm_default_branch_cost,
1917   &arm_default_vec_cost,
1918   1,                                            /* Constant limit.  */
1919   5,                                            /* Max cond insns.  */
1920   8,                                            /* Memset max inline.  */
1921   2,                                            /* Issue rate.  */
1922   ARM_PREFETCH_NOT_BENEFICIAL,
1923   tune_params::PREF_CONST_POOL_TRUE,
1924   tune_params::PREF_LDRD_FALSE,
1925   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1926   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1927   tune_params::DISPARAGE_FLAGS_NEITHER,
1928   tune_params::PREF_NEON_64_FALSE,
1929   tune_params::PREF_NEON_STRINGOPS_FALSE,
1930   tune_params::FUSE_NOTHING,
1931   tune_params::SCHED_AUTOPREF_OFF
1932 };
1933
1934 const struct tune_params arm_v6t2_tune =
1935 {
1936   &generic_extra_costs,                 /* Insn extra costs.  */
1937   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1938   NULL,                                 /* Sched adj cost.  */
1939   arm_default_branch_cost,
1940   &arm_default_vec_cost,
1941   1,                                            /* Constant limit.  */
1942   5,                                            /* Max cond insns.  */
1943   8,                                            /* Memset max inline.  */
1944   1,                                            /* Issue rate.  */
1945   ARM_PREFETCH_NOT_BENEFICIAL,
1946   tune_params::PREF_CONST_POOL_FALSE,
1947   tune_params::PREF_LDRD_FALSE,
1948   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1950   tune_params::DISPARAGE_FLAGS_NEITHER,
1951   tune_params::PREF_NEON_64_FALSE,
1952   tune_params::PREF_NEON_STRINGOPS_FALSE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957
1958 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1959 const struct tune_params arm_cortex_tune =
1960 {
1961   &generic_extra_costs,
1962   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1963   NULL,                                 /* Sched adj cost.  */
1964   arm_default_branch_cost,
1965   &arm_default_vec_cost,
1966   1,                                            /* Constant limit.  */
1967   5,                                            /* Max cond insns.  */
1968   8,                                            /* Memset max inline.  */
1969   2,                                            /* Issue rate.  */
1970   ARM_PREFETCH_NOT_BENEFICIAL,
1971   tune_params::PREF_CONST_POOL_FALSE,
1972   tune_params::PREF_LDRD_FALSE,
1973   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1974   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1975   tune_params::DISPARAGE_FLAGS_NEITHER,
1976   tune_params::PREF_NEON_64_FALSE,
1977   tune_params::PREF_NEON_STRINGOPS_FALSE,
1978   tune_params::FUSE_NOTHING,
1979   tune_params::SCHED_AUTOPREF_OFF
1980 };
1981
1982 const struct tune_params arm_cortex_a8_tune =
1983 {
1984   &cortexa8_extra_costs,
1985   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1986   NULL,                                 /* Sched adj cost.  */
1987   arm_default_branch_cost,
1988   &arm_default_vec_cost,
1989   1,                                            /* Constant limit.  */
1990   5,                                            /* Max cond insns.  */
1991   8,                                            /* Memset max inline.  */
1992   2,                                            /* Issue rate.  */
1993   ARM_PREFETCH_NOT_BENEFICIAL,
1994   tune_params::PREF_CONST_POOL_FALSE,
1995   tune_params::PREF_LDRD_FALSE,
1996   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1997   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1998   tune_params::DISPARAGE_FLAGS_NEITHER,
1999   tune_params::PREF_NEON_64_FALSE,
2000   tune_params::PREF_NEON_STRINGOPS_TRUE,
2001   tune_params::FUSE_NOTHING,
2002   tune_params::SCHED_AUTOPREF_OFF
2003 };
2004
2005 const struct tune_params arm_cortex_a7_tune =
2006 {
2007   &cortexa7_extra_costs,
2008   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2009   NULL,                                 /* Sched adj cost.  */
2010   arm_default_branch_cost,
2011   &arm_default_vec_cost,
2012   1,                                            /* Constant limit.  */
2013   5,                                            /* Max cond insns.  */
2014   8,                                            /* Memset max inline.  */
2015   2,                                            /* Issue rate.  */
2016   ARM_PREFETCH_NOT_BENEFICIAL,
2017   tune_params::PREF_CONST_POOL_FALSE,
2018   tune_params::PREF_LDRD_FALSE,
2019   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2020   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2021   tune_params::DISPARAGE_FLAGS_NEITHER,
2022   tune_params::PREF_NEON_64_FALSE,
2023   tune_params::PREF_NEON_STRINGOPS_TRUE,
2024   tune_params::FUSE_NOTHING,
2025   tune_params::SCHED_AUTOPREF_OFF
2026 };
2027
2028 const struct tune_params arm_cortex_a15_tune =
2029 {
2030   &cortexa15_extra_costs,
2031   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2032   NULL,                                 /* Sched adj cost.  */
2033   arm_default_branch_cost,
2034   &arm_default_vec_cost,
2035   1,                                            /* Constant limit.  */
2036   2,                                            /* Max cond insns.  */
2037   8,                                            /* Memset max inline.  */
2038   3,                                            /* Issue rate.  */
2039   ARM_PREFETCH_NOT_BENEFICIAL,
2040   tune_params::PREF_CONST_POOL_FALSE,
2041   tune_params::PREF_LDRD_TRUE,
2042   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2043   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2044   tune_params::DISPARAGE_FLAGS_ALL,
2045   tune_params::PREF_NEON_64_FALSE,
2046   tune_params::PREF_NEON_STRINGOPS_TRUE,
2047   tune_params::FUSE_NOTHING,
2048   tune_params::SCHED_AUTOPREF_FULL
2049 };
2050
2051 const struct tune_params arm_cortex_a35_tune =
2052 {
2053   &cortexa53_extra_costs,
2054   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2055   NULL,                                 /* Sched adj cost.  */
2056   arm_default_branch_cost,
2057   &arm_default_vec_cost,
2058   1,                                            /* Constant limit.  */
2059   5,                                            /* Max cond insns.  */
2060   8,                                            /* Memset max inline.  */
2061   1,                                            /* Issue rate.  */
2062   ARM_PREFETCH_NOT_BENEFICIAL,
2063   tune_params::PREF_CONST_POOL_FALSE,
2064   tune_params::PREF_LDRD_FALSE,
2065   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2066   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2067   tune_params::DISPARAGE_FLAGS_NEITHER,
2068   tune_params::PREF_NEON_64_FALSE,
2069   tune_params::PREF_NEON_STRINGOPS_TRUE,
2070   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2071   tune_params::SCHED_AUTOPREF_OFF
2072 };
2073
2074 const struct tune_params arm_cortex_a53_tune =
2075 {
2076   &cortexa53_extra_costs,
2077   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2078   NULL,                                 /* Sched adj cost.  */
2079   arm_default_branch_cost,
2080   &arm_default_vec_cost,
2081   1,                                            /* Constant limit.  */
2082   5,                                            /* Max cond insns.  */
2083   8,                                            /* Memset max inline.  */
2084   2,                                            /* Issue rate.  */
2085   ARM_PREFETCH_NOT_BENEFICIAL,
2086   tune_params::PREF_CONST_POOL_FALSE,
2087   tune_params::PREF_LDRD_FALSE,
2088   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2089   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2090   tune_params::DISPARAGE_FLAGS_NEITHER,
2091   tune_params::PREF_NEON_64_FALSE,
2092   tune_params::PREF_NEON_STRINGOPS_TRUE,
2093   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2094   tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 const struct tune_params arm_cortex_a57_tune =
2098 {
2099   &cortexa57_extra_costs,
2100   &generic_addr_mode_costs,             /* addressing mode costs */
2101   NULL,                                 /* Sched adj cost.  */
2102   arm_default_branch_cost,
2103   &arm_default_vec_cost,
2104   1,                                            /* Constant limit.  */
2105   2,                                            /* Max cond insns.  */
2106   8,                                            /* Memset max inline.  */
2107   3,                                            /* Issue rate.  */
2108   ARM_PREFETCH_NOT_BENEFICIAL,
2109   tune_params::PREF_CONST_POOL_FALSE,
2110   tune_params::PREF_LDRD_TRUE,
2111   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2112   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2113   tune_params::DISPARAGE_FLAGS_ALL,
2114   tune_params::PREF_NEON_64_FALSE,
2115   tune_params::PREF_NEON_STRINGOPS_TRUE,
2116   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2117   tune_params::SCHED_AUTOPREF_FULL
2118 };
2119
2120 const struct tune_params arm_exynosm1_tune =
2121 {
2122   &exynosm1_extra_costs,
2123   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2124   NULL,                                         /* Sched adj cost.  */
2125   arm_default_branch_cost,
2126   &arm_default_vec_cost,
2127   1,                                            /* Constant limit.  */
2128   2,                                            /* Max cond insns.  */
2129   8,                                            /* Memset max inline.  */
2130   3,                                            /* Issue rate.  */
2131   ARM_PREFETCH_NOT_BENEFICIAL,
2132   tune_params::PREF_CONST_POOL_FALSE,
2133   tune_params::PREF_LDRD_TRUE,
2134   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2135   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2136   tune_params::DISPARAGE_FLAGS_ALL,
2137   tune_params::PREF_NEON_64_FALSE,
2138   tune_params::PREF_NEON_STRINGOPS_TRUE,
2139   tune_params::FUSE_NOTHING,
2140   tune_params::SCHED_AUTOPREF_OFF
2141 };
2142
2143 const struct tune_params arm_xgene1_tune =
2144 {
2145   &xgene1_extra_costs,
2146   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2147   NULL,                                 /* Sched adj cost.  */
2148   arm_default_branch_cost,
2149   &arm_default_vec_cost,
2150   1,                                            /* Constant limit.  */
2151   2,                                            /* Max cond insns.  */
2152   32,                                           /* Memset max inline.  */
2153   4,                                            /* Issue rate.  */
2154   ARM_PREFETCH_NOT_BENEFICIAL,
2155   tune_params::PREF_CONST_POOL_FALSE,
2156   tune_params::PREF_LDRD_TRUE,
2157   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2159   tune_params::DISPARAGE_FLAGS_ALL,
2160   tune_params::PREF_NEON_64_FALSE,
2161   tune_params::PREF_NEON_STRINGOPS_FALSE,
2162   tune_params::FUSE_NOTHING,
2163   tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2167    less appealing.  Set max_insns_skipped to a low value.  */
2168
2169 const struct tune_params arm_cortex_a5_tune =
2170 {
2171   &cortexa5_extra_costs,
2172   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2173   NULL,                                 /* Sched adj cost.  */
2174   arm_cortex_a5_branch_cost,
2175   &arm_default_vec_cost,
2176   1,                                            /* Constant limit.  */
2177   1,                                            /* Max cond insns.  */
2178   8,                                            /* Memset max inline.  */
2179   2,                                            /* Issue rate.  */
2180   ARM_PREFETCH_NOT_BENEFICIAL,
2181   tune_params::PREF_CONST_POOL_FALSE,
2182   tune_params::PREF_LDRD_FALSE,
2183   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2184   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2185   tune_params::DISPARAGE_FLAGS_NEITHER,
2186   tune_params::PREF_NEON_64_FALSE,
2187   tune_params::PREF_NEON_STRINGOPS_TRUE,
2188   tune_params::FUSE_NOTHING,
2189   tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a9_tune =
2193 {
2194   &cortexa9_extra_costs,
2195   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2196   cortex_a9_sched_adjust_cost,
2197   arm_default_branch_cost,
2198   &arm_default_vec_cost,
2199   1,                                            /* Constant limit.  */
2200   5,                                            /* Max cond insns.  */
2201   8,                                            /* Memset max inline.  */
2202   2,                                            /* Issue rate.  */
2203   ARM_PREFETCH_BENEFICIAL(4,32,32),
2204   tune_params::PREF_CONST_POOL_FALSE,
2205   tune_params::PREF_LDRD_FALSE,
2206   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2207   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2208   tune_params::DISPARAGE_FLAGS_NEITHER,
2209   tune_params::PREF_NEON_64_FALSE,
2210   tune_params::PREF_NEON_STRINGOPS_FALSE,
2211   tune_params::FUSE_NOTHING,
2212   tune_params::SCHED_AUTOPREF_OFF
2213 };
2214
2215 const struct tune_params arm_cortex_a12_tune =
2216 {
2217   &cortexa12_extra_costs,
2218   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2219   NULL,                                 /* Sched adj cost.  */
2220   arm_default_branch_cost,
2221   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2222   1,                                            /* Constant limit.  */
2223   2,                                            /* Max cond insns.  */
2224   8,                                            /* Memset max inline.  */
2225   2,                                            /* Issue rate.  */
2226   ARM_PREFETCH_NOT_BENEFICIAL,
2227   tune_params::PREF_CONST_POOL_FALSE,
2228   tune_params::PREF_LDRD_TRUE,
2229   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2230   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2231   tune_params::DISPARAGE_FLAGS_ALL,
2232   tune_params::PREF_NEON_64_FALSE,
2233   tune_params::PREF_NEON_STRINGOPS_TRUE,
2234   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2235   tune_params::SCHED_AUTOPREF_OFF
2236 };
2237
2238 const struct tune_params arm_cortex_a73_tune =
2239 {
2240   &cortexa57_extra_costs,
2241   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2242   NULL,                                         /* Sched adj cost.  */
2243   arm_default_branch_cost,
2244   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2245   1,                                            /* Constant limit.  */
2246   2,                                            /* Max cond insns.  */
2247   8,                                            /* Memset max inline.  */
2248   2,                                            /* Issue rate.  */
2249   ARM_PREFETCH_NOT_BENEFICIAL,
2250   tune_params::PREF_CONST_POOL_FALSE,
2251   tune_params::PREF_LDRD_TRUE,
2252   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2253   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2254   tune_params::DISPARAGE_FLAGS_ALL,
2255   tune_params::PREF_NEON_64_FALSE,
2256   tune_params::PREF_NEON_STRINGOPS_TRUE,
2257   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2258   tune_params::SCHED_AUTOPREF_FULL
2259 };
2260
2261 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2262    cycle to execute each.  An LDR from the constant pool also takes two cycles
2263    to execute, but mildly increases pipelining opportunity (consecutive
2264    loads/stores can be pipelined together, saving one cycle), and may also
2265    improve icache utilisation.  Hence we prefer the constant pool for such
2266    processors.  */
2267
2268 const struct tune_params arm_v7m_tune =
2269 {
2270   &v7m_extra_costs,
2271   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2272   NULL,                                 /* Sched adj cost.  */
2273   arm_cortex_m_branch_cost,
2274   &arm_default_vec_cost,
2275   1,                                            /* Constant limit.  */
2276   2,                                            /* Max cond insns.  */
2277   8,                                            /* Memset max inline.  */
2278   1,                                            /* Issue rate.  */
2279   ARM_PREFETCH_NOT_BENEFICIAL,
2280   tune_params::PREF_CONST_POOL_TRUE,
2281   tune_params::PREF_LDRD_FALSE,
2282   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2283   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2284   tune_params::DISPARAGE_FLAGS_NEITHER,
2285   tune_params::PREF_NEON_64_FALSE,
2286   tune_params::PREF_NEON_STRINGOPS_FALSE,
2287   tune_params::FUSE_NOTHING,
2288   tune_params::SCHED_AUTOPREF_OFF
2289 };
2290
2291 /* Cortex-M7 tuning.  */
2292
2293 const struct tune_params arm_cortex_m7_tune =
2294 {
2295   &v7m_extra_costs,
2296   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2297   NULL,                                 /* Sched adj cost.  */
2298   arm_cortex_m7_branch_cost,
2299   &arm_default_vec_cost,
2300   0,                                            /* Constant limit.  */
2301   1,                                            /* Max cond insns.  */
2302   8,                                            /* Memset max inline.  */
2303   2,                                            /* Issue rate.  */
2304   ARM_PREFETCH_NOT_BENEFICIAL,
2305   tune_params::PREF_CONST_POOL_TRUE,
2306   tune_params::PREF_LDRD_FALSE,
2307   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2308   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2309   tune_params::DISPARAGE_FLAGS_NEITHER,
2310   tune_params::PREF_NEON_64_FALSE,
2311   tune_params::PREF_NEON_STRINGOPS_FALSE,
2312   tune_params::FUSE_NOTHING,
2313   tune_params::SCHED_AUTOPREF_OFF
2314 };
2315
2316 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2317    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2318    cortex-m23.  */
2319 const struct tune_params arm_v6m_tune =
2320 {
2321   &generic_extra_costs,                 /* Insn extra costs.  */
2322   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2323   NULL,                                 /* Sched adj cost.  */
2324   arm_default_branch_cost,
2325   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2326   1,                                            /* Constant limit.  */
2327   5,                                            /* Max cond insns.  */
2328   8,                                            /* Memset max inline.  */
2329   1,                                            /* Issue rate.  */
2330   ARM_PREFETCH_NOT_BENEFICIAL,
2331   tune_params::PREF_CONST_POOL_FALSE,
2332   tune_params::PREF_LDRD_FALSE,
2333   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2334   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2335   tune_params::DISPARAGE_FLAGS_NEITHER,
2336   tune_params::PREF_NEON_64_FALSE,
2337   tune_params::PREF_NEON_STRINGOPS_FALSE,
2338   tune_params::FUSE_NOTHING,
2339   tune_params::SCHED_AUTOPREF_OFF
2340 };
2341
2342 const struct tune_params arm_fa726te_tune =
2343 {
2344   &generic_extra_costs,                         /* Insn extra costs.  */
2345   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2346   fa726te_sched_adjust_cost,
2347   arm_default_branch_cost,
2348   &arm_default_vec_cost,
2349   1,                                            /* Constant limit.  */
2350   5,                                            /* Max cond insns.  */
2351   8,                                            /* Memset max inline.  */
2352   2,                                            /* Issue rate.  */
2353   ARM_PREFETCH_NOT_BENEFICIAL,
2354   tune_params::PREF_CONST_POOL_TRUE,
2355   tune_params::PREF_LDRD_FALSE,
2356   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2357   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2358   tune_params::DISPARAGE_FLAGS_NEITHER,
2359   tune_params::PREF_NEON_64_FALSE,
2360   tune_params::PREF_NEON_STRINGOPS_FALSE,
2361   tune_params::FUSE_NOTHING,
2362   tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Auto-generated CPU, FPU and architecture tables.  */
2366 #include "arm-cpu-data.h"
2367
2368 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2369    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2370    is thus chosen to be big enough to hold the longest architecture name.  */
2371
2372 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2373
2374 /* Supported TLS relocations.  */
2375
2376 enum tls_reloc {
2377   TLS_GD32,
2378   TLS_LDM32,
2379   TLS_LDO32,
2380   TLS_IE32,
2381   TLS_LE32,
2382   TLS_DESCSEQ   /* GNU scheme */
2383 };
2384
2385 /* The maximum number of insns to be used when loading a constant.  */
2386 inline static int
2387 arm_constant_limit (bool size_p)
2388 {
2389   return size_p ? 1 : current_tune->constant_limit;
2390 }
2391
2392 /* Emit an insn that's a simple single-set.  Both the operands must be known
2393    to be valid.  */
2394 inline static rtx_insn *
2395 emit_set_insn (rtx x, rtx y)
2396 {
2397   return emit_insn (gen_rtx_SET (x, y));
2398 }
2399
2400 /* Return the number of bits set in VALUE.  */
2401 static unsigned
2402 bit_count (unsigned long value)
2403 {
2404   unsigned long count = 0;
2405
2406   while (value)
2407     {
2408       count++;
2409       value &= value - 1;  /* Clear the least-significant set bit.  */
2410     }
2411
2412   return count;
2413 }
2414
2415 /* Return the number of bits set in BMAP.  */
2416 static unsigned
2417 bitmap_popcount (const sbitmap bmap)
2418 {
2419   unsigned int count = 0;
2420   unsigned int n = 0;
2421   sbitmap_iterator sbi;
2422
2423   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2424     count++;
2425   return count;
2426 }
2427
2428 typedef struct
2429 {
2430   machine_mode mode;
2431   const char *name;
2432 } arm_fixed_mode_set;
2433
2434 /* A small helper for setting fixed-point library libfuncs.  */
2435
2436 static void
2437 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2438                              const char *funcname, const char *modename,
2439                              int num_suffix)
2440 {
2441   char buffer[50];
2442
2443   if (num_suffix == 0)
2444     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2445   else
2446     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2447
2448   set_optab_libfunc (optable, mode, buffer);
2449 }
2450
2451 static void
2452 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2453                             machine_mode from, const char *funcname,
2454                             const char *toname, const char *fromname)
2455 {
2456   char buffer[50];
2457   const char *maybe_suffix_2 = "";
2458
2459   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2460   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2461       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2462       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2463     maybe_suffix_2 = "2";
2464
2465   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2466            maybe_suffix_2);
2467
2468   set_conv_libfunc (optable, to, from, buffer);
2469 }
2470
2471 /* Set up library functions unique to ARM.  */
2472
2473 static void
2474 arm_init_libfuncs (void)
2475 {
2476   /* For Linux, we have access to kernel support for atomic operations.  */
2477   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2478     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2479
2480   /* There are no special library functions unless we are using the
2481      ARM BPABI.  */
2482   if (!TARGET_BPABI)
2483     return;
2484
2485   /* The functions below are described in Section 4 of the "Run-Time
2486      ABI for the ARM architecture", Version 1.0.  */
2487
2488   /* Double-precision floating-point arithmetic.  Table 2.  */
2489   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2490   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2491   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2492   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2493   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2494
2495   /* Double-precision comparisons.  Table 3.  */
2496   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2497   set_optab_libfunc (ne_optab, DFmode, NULL);
2498   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2499   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2500   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2501   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2502   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2503
2504   /* Single-precision floating-point arithmetic.  Table 4.  */
2505   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2506   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2507   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2508   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2509   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2510
2511   /* Single-precision comparisons.  Table 5.  */
2512   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2513   set_optab_libfunc (ne_optab, SFmode, NULL);
2514   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2515   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2516   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2517   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2518   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2519
2520   /* Floating-point to integer conversions.  Table 6.  */
2521   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2522   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2523   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2524   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2525   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2526   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2527   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2528   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2529
2530   /* Conversions between floating types.  Table 7.  */
2531   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2532   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2533
2534   /* Integer to floating-point conversions.  Table 8.  */
2535   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2536   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2537   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2538   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2539   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2540   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2541   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2542   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2543
2544   /* Long long.  Table 9.  */
2545   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2546   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2547   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2548   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2549   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2550   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2551   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2552   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2553
2554   /* Integer (32/32->32) division.  \S 4.3.1.  */
2555   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2556   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2557
2558   /* The divmod functions are designed so that they can be used for
2559      plain division, even though they return both the quotient and the
2560      remainder.  The quotient is returned in the usual location (i.e.,
2561      r0 for SImode, {r0, r1} for DImode), just as would be expected
2562      for an ordinary division routine.  Because the AAPCS calling
2563      conventions specify that all of { r0, r1, r2, r3 } are
2564      callee-saved registers, there is no need to tell the compiler
2565      explicitly that those registers are clobbered by these
2566      routines.  */
2567   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2568   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2569
2570   /* For SImode division the ABI provides div-without-mod routines,
2571      which are faster.  */
2572   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2573   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2574
2575   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2576      divmod libcalls instead.  */
2577   set_optab_libfunc (smod_optab, DImode, NULL);
2578   set_optab_libfunc (umod_optab, DImode, NULL);
2579   set_optab_libfunc (smod_optab, SImode, NULL);
2580   set_optab_libfunc (umod_optab, SImode, NULL);
2581
2582   /* Half-precision float operations.  The compiler handles all operations
2583      with NULL libfuncs by converting the SFmode.  */
2584   switch (arm_fp16_format)
2585     {
2586     case ARM_FP16_FORMAT_IEEE:
2587     case ARM_FP16_FORMAT_ALTERNATIVE:
2588
2589       /* Conversions.  */
2590       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2591                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2592                          ? "__gnu_f2h_ieee"
2593                          : "__gnu_f2h_alternative"));
2594       set_conv_libfunc (sext_optab, SFmode, HFmode,
2595                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596                          ? "__gnu_h2f_ieee"
2597                          : "__gnu_h2f_alternative"));
2598
2599       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2600                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2601                          ? "__gnu_d2h_ieee"
2602                          : "__gnu_d2h_alternative"));
2603
2604       /* Arithmetic.  */
2605       set_optab_libfunc (add_optab, HFmode, NULL);
2606       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2607       set_optab_libfunc (smul_optab, HFmode, NULL);
2608       set_optab_libfunc (neg_optab, HFmode, NULL);
2609       set_optab_libfunc (sub_optab, HFmode, NULL);
2610
2611       /* Comparisons.  */
2612       set_optab_libfunc (eq_optab, HFmode, NULL);
2613       set_optab_libfunc (ne_optab, HFmode, NULL);
2614       set_optab_libfunc (lt_optab, HFmode, NULL);
2615       set_optab_libfunc (le_optab, HFmode, NULL);
2616       set_optab_libfunc (ge_optab, HFmode, NULL);
2617       set_optab_libfunc (gt_optab, HFmode, NULL);
2618       set_optab_libfunc (unord_optab, HFmode, NULL);
2619       break;
2620
2621     default:
2622       break;
2623     }
2624
2625   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2626   {
2627     const arm_fixed_mode_set fixed_arith_modes[] =
2628       {
2629         { E_QQmode, "qq" },
2630         { E_UQQmode, "uqq" },
2631         { E_HQmode, "hq" },
2632         { E_UHQmode, "uhq" },
2633         { E_SQmode, "sq" },
2634         { E_USQmode, "usq" },
2635         { E_DQmode, "dq" },
2636         { E_UDQmode, "udq" },
2637         { E_TQmode, "tq" },
2638         { E_UTQmode, "utq" },
2639         { E_HAmode, "ha" },
2640         { E_UHAmode, "uha" },
2641         { E_SAmode, "sa" },
2642         { E_USAmode, "usa" },
2643         { E_DAmode, "da" },
2644         { E_UDAmode, "uda" },
2645         { E_TAmode, "ta" },
2646         { E_UTAmode, "uta" }
2647       };
2648     const arm_fixed_mode_set fixed_conv_modes[] =
2649       {
2650         { E_QQmode, "qq" },
2651         { E_UQQmode, "uqq" },
2652         { E_HQmode, "hq" },
2653         { E_UHQmode, "uhq" },
2654         { E_SQmode, "sq" },
2655         { E_USQmode, "usq" },
2656         { E_DQmode, "dq" },
2657         { E_UDQmode, "udq" },
2658         { E_TQmode, "tq" },
2659         { E_UTQmode, "utq" },
2660         { E_HAmode, "ha" },
2661         { E_UHAmode, "uha" },
2662         { E_SAmode, "sa" },
2663         { E_USAmode, "usa" },
2664         { E_DAmode, "da" },
2665         { E_UDAmode, "uda" },
2666         { E_TAmode, "ta" },
2667         { E_UTAmode, "uta" },
2668         { E_QImode, "qi" },
2669         { E_HImode, "hi" },
2670         { E_SImode, "si" },
2671         { E_DImode, "di" },
2672         { E_TImode, "ti" },
2673         { E_SFmode, "sf" },
2674         { E_DFmode, "df" }
2675       };
2676     unsigned int i, j;
2677
2678     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2679       {
2680         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2681                                      "add", fixed_arith_modes[i].name, 3);
2682         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2683                                      "ssadd", fixed_arith_modes[i].name, 3);
2684         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2685                                      "usadd", fixed_arith_modes[i].name, 3);
2686         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2687                                      "sub", fixed_arith_modes[i].name, 3);
2688         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2689                                      "sssub", fixed_arith_modes[i].name, 3);
2690         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2691                                      "ussub", fixed_arith_modes[i].name, 3);
2692         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2693                                      "mul", fixed_arith_modes[i].name, 3);
2694         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2695                                      "ssmul", fixed_arith_modes[i].name, 3);
2696         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2697                                      "usmul", fixed_arith_modes[i].name, 3);
2698         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2699                                      "div", fixed_arith_modes[i].name, 3);
2700         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2701                                      "udiv", fixed_arith_modes[i].name, 3);
2702         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2703                                      "ssdiv", fixed_arith_modes[i].name, 3);
2704         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2705                                      "usdiv", fixed_arith_modes[i].name, 3);
2706         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2707                                      "neg", fixed_arith_modes[i].name, 2);
2708         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2709                                      "ssneg", fixed_arith_modes[i].name, 2);
2710         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2711                                      "usneg", fixed_arith_modes[i].name, 2);
2712         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2713                                      "ashl", fixed_arith_modes[i].name, 3);
2714         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2715                                      "ashr", fixed_arith_modes[i].name, 3);
2716         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2717                                      "lshr", fixed_arith_modes[i].name, 3);
2718         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2719                                      "ssashl", fixed_arith_modes[i].name, 3);
2720         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2721                                      "usashl", fixed_arith_modes[i].name, 3);
2722         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2723                                      "cmp", fixed_arith_modes[i].name, 2);
2724       }
2725
2726     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2727       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2728         {
2729           if (i == j
2730               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2731                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2732             continue;
2733
2734           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2735                                       fixed_conv_modes[j].mode, "fract",
2736                                       fixed_conv_modes[i].name,
2737                                       fixed_conv_modes[j].name);
2738           arm_set_fixed_conv_libfunc (satfract_optab,
2739                                       fixed_conv_modes[i].mode,
2740                                       fixed_conv_modes[j].mode, "satfract",
2741                                       fixed_conv_modes[i].name,
2742                                       fixed_conv_modes[j].name);
2743           arm_set_fixed_conv_libfunc (fractuns_optab,
2744                                       fixed_conv_modes[i].mode,
2745                                       fixed_conv_modes[j].mode, "fractuns",
2746                                       fixed_conv_modes[i].name,
2747                                       fixed_conv_modes[j].name);
2748           arm_set_fixed_conv_libfunc (satfractuns_optab,
2749                                       fixed_conv_modes[i].mode,
2750                                       fixed_conv_modes[j].mode, "satfractuns",
2751                                       fixed_conv_modes[i].name,
2752                                       fixed_conv_modes[j].name);
2753         }
2754   }
2755
2756   if (TARGET_AAPCS_BASED)
2757     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2758 }
2759
2760 /* On AAPCS systems, this is the "struct __va_list".  */
2761 static GTY(()) tree va_list_type;
2762
2763 /* Return the type to use as __builtin_va_list.  */
2764 static tree
2765 arm_build_builtin_va_list (void)
2766 {
2767   tree va_list_name;
2768   tree ap_field;
2769
2770   if (!TARGET_AAPCS_BASED)
2771     return std_build_builtin_va_list ();
2772
2773   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2774      defined as:
2775
2776        struct __va_list
2777        {
2778          void *__ap;
2779        };
2780
2781      The C Library ABI further reinforces this definition in \S
2782      4.1.
2783
2784      We must follow this definition exactly.  The structure tag
2785      name is visible in C++ mangled names, and thus forms a part
2786      of the ABI.  The field name may be used by people who
2787      #include <stdarg.h>.  */
2788   /* Create the type.  */
2789   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2790   /* Give it the required name.  */
2791   va_list_name = build_decl (BUILTINS_LOCATION,
2792                              TYPE_DECL,
2793                              get_identifier ("__va_list"),
2794                              va_list_type);
2795   DECL_ARTIFICIAL (va_list_name) = 1;
2796   TYPE_NAME (va_list_type) = va_list_name;
2797   TYPE_STUB_DECL (va_list_type) = va_list_name;
2798   /* Create the __ap field.  */
2799   ap_field = build_decl (BUILTINS_LOCATION,
2800                          FIELD_DECL,
2801                          get_identifier ("__ap"),
2802                          ptr_type_node);
2803   DECL_ARTIFICIAL (ap_field) = 1;
2804   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2805   TYPE_FIELDS (va_list_type) = ap_field;
2806   /* Compute its layout.  */
2807   layout_type (va_list_type);
2808
2809   return va_list_type;
2810 }
2811
2812 /* Return an expression of type "void *" pointing to the next
2813    available argument in a variable-argument list.  VALIST is the
2814    user-level va_list object, of type __builtin_va_list.  */
2815 static tree
2816 arm_extract_valist_ptr (tree valist)
2817 {
2818   if (TREE_TYPE (valist) == error_mark_node)
2819     return error_mark_node;
2820
2821   /* On an AAPCS target, the pointer is stored within "struct
2822      va_list".  */
2823   if (TARGET_AAPCS_BASED)
2824     {
2825       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2826       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2827                        valist, ap_field, NULL_TREE);
2828     }
2829
2830   return valist;
2831 }
2832
2833 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2834 static void
2835 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2836 {
2837   valist = arm_extract_valist_ptr (valist);
2838   std_expand_builtin_va_start (valist, nextarg);
2839 }
2840
2841 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2842 static tree
2843 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2844                           gimple_seq *post_p)
2845 {
2846   valist = arm_extract_valist_ptr (valist);
2847   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2848 }
2849
2850 /* Check any incompatible options that the user has specified.  */
2851 static void
2852 arm_option_check_internal (struct gcc_options *opts)
2853 {
2854   int flags = opts->x_target_flags;
2855
2856   /* iWMMXt and NEON are incompatible.  */
2857   if (TARGET_IWMMXT
2858       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2859     error ("iWMMXt and NEON are incompatible");
2860
2861   /* Make sure that the processor choice does not conflict with any of the
2862      other command line choices.  */
2863   if (TARGET_ARM_P (flags)
2864       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2865     error ("target CPU does not support ARM mode");
2866
2867   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2868   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2869     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2870
2871   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2872     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2873
2874   /* If this target is normally configured to use APCS frames, warn if they
2875      are turned off and debugging is turned on.  */
2876   if (TARGET_ARM_P (flags)
2877       && write_symbols != NO_DEBUG
2878       && !TARGET_APCS_FRAME
2879       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2880     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2881
2882   /* iWMMXt unsupported under Thumb mode.  */
2883   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2884     error ("iWMMXt unsupported under Thumb mode");
2885
2886   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2887     error ("can not use -mtp=cp15 with 16-bit Thumb");
2888
2889   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2890     {
2891       error ("RTP PIC is incompatible with Thumb");
2892       flag_pic = 0;
2893     }
2894
2895   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2896      with MOVT.  */
2897   if ((target_pure_code || target_slow_flash_data)
2898       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2899     {
2900       const char *flag = (target_pure_code ? "-mpure-code" :
2901                                              "-mslow-flash-data");
2902       error ("%s only supports non-pic code on M-profile targets with the "
2903              "MOVT instruction", flag);
2904     }
2905
2906 }
2907
2908 /* Recompute the global settings depending on target attribute options.  */
2909
2910 static void
2911 arm_option_params_internal (void)
2912 {
2913   /* If we are not using the default (ARM mode) section anchor offset
2914      ranges, then set the correct ranges now.  */
2915   if (TARGET_THUMB1)
2916     {
2917       /* Thumb-1 LDR instructions cannot have negative offsets.
2918          Permissible positive offset ranges are 5-bit (for byte loads),
2919          6-bit (for halfword loads), or 7-bit (for word loads).
2920          Empirical results suggest a 7-bit anchor range gives the best
2921          overall code size.  */
2922       targetm.min_anchor_offset = 0;
2923       targetm.max_anchor_offset = 127;
2924     }
2925   else if (TARGET_THUMB2)
2926     {
2927       /* The minimum is set such that the total size of the block
2928          for a particular anchor is 248 + 1 + 4095 bytes, which is
2929          divisible by eight, ensuring natural spacing of anchors.  */
2930       targetm.min_anchor_offset = -248;
2931       targetm.max_anchor_offset = 4095;
2932     }
2933   else
2934     {
2935       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2936       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2937     }
2938
2939   /* Increase the number of conditional instructions with -Os.  */
2940   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2941
2942   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2943   if (TARGET_THUMB2)
2944     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2945 }
2946
2947 /* True if -mflip-thumb should next add an attribute for the default
2948    mode, false if it should next add an attribute for the opposite mode.  */
2949 static GTY(()) bool thumb_flipper;
2950
2951 /* Options after initial target override.  */
2952 static GTY(()) tree init_optimize;
2953
2954 static void
2955 arm_override_options_after_change_1 (struct gcc_options *opts)
2956 {
2957   if (opts->x_align_functions <= 0)
2958     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2959       && opts->x_optimize_size ? 2 : 4;
2960 }
2961
2962 /* Implement targetm.override_options_after_change.  */
2963
2964 static void
2965 arm_override_options_after_change (void)
2966 {
2967   arm_configure_build_target (&arm_active_target,
2968                               TREE_TARGET_OPTION (target_option_default_node),
2969                               &global_options_set, false);
2970
2971   arm_override_options_after_change_1 (&global_options);
2972 }
2973
2974 /* Implement TARGET_OPTION_SAVE.  */
2975 static void
2976 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2977 {
2978   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2979   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2980   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2981 }
2982
2983 /* Implement TARGET_OPTION_RESTORE.  */
2984 static void
2985 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2986 {
2987   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2988   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2989   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2990   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2991                               false);
2992 }
2993
2994 /* Reset options between modes that the user has specified.  */
2995 static void
2996 arm_option_override_internal (struct gcc_options *opts,
2997                               struct gcc_options *opts_set)
2998 {
2999   arm_override_options_after_change_1 (opts);
3000
3001   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3002     {
3003       /* The default is to enable interworking, so this warning message would
3004          be confusing to users who have just compiled with, eg, -march=armv3.  */
3005       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006       opts->x_target_flags &= ~MASK_INTERWORK;
3007     }
3008
3009   if (TARGET_THUMB_P (opts->x_target_flags)
3010       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3011     {
3012       warning (0, "target CPU does not support THUMB instructions");
3013       opts->x_target_flags &= ~MASK_THUMB;
3014     }
3015
3016   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3017     {
3018       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019       opts->x_target_flags &= ~MASK_APCS_FRAME;
3020     }
3021
3022   /* Callee super interworking implies thumb interworking.  Adding
3023      this to the flags here simplifies the logic elsewhere.  */
3024   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3025     opts->x_target_flags |= MASK_INTERWORK;
3026
3027   /* need to remember initial values so combinaisons of options like
3028      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3029   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3030
3031   if (! opts_set->x_arm_restrict_it)
3032     opts->x_arm_restrict_it = arm_arch8;
3033
3034   /* ARM execution state and M profile don't have [restrict] IT.  */
3035   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3036     opts->x_arm_restrict_it = 0;
3037
3038   /* Enable -munaligned-access by default for
3039      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040      i.e. Thumb2 and ARM state only.
3041      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042      - ARMv8 architecture-base processors.
3043
3044      Disable -munaligned-access by default for
3045      - all pre-ARMv6 architecture-based processors
3046      - ARMv6-M architecture-based processors
3047      - ARMv8-M Baseline processors.  */
3048
3049   if (! opts_set->x_unaligned_access)
3050     {
3051       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3052                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3053     }
3054   else if (opts->x_unaligned_access == 1
3055            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3056     {
3057       warning (0, "target CPU does not support unaligned accesses");
3058      opts->x_unaligned_access = 0;
3059     }
3060
3061   /* Don't warn since it's on by default in -O2.  */
3062   if (TARGET_THUMB1_P (opts->x_target_flags))
3063     opts->x_flag_schedule_insns = 0;
3064   else
3065     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3066
3067   /* Disable shrink-wrap when optimizing function for size, since it tends to
3068      generate additional returns.  */
3069   if (optimize_function_for_size_p (cfun)
3070       && TARGET_THUMB2_P (opts->x_target_flags))
3071     opts->x_flag_shrink_wrap = false;
3072   else
3073     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3074
3075   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076      - epilogue_insns - does not accurately model the corresponding insns
3077      emitted in the asm file.  In particular, see the comment in thumb_exit
3078      'Find out how many of the (return) argument registers we can corrupt'.
3079      As a consequence, the epilogue may clobber registers without fipa-ra
3080      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3081      TODO: Accurately model clobbers for epilogue_insns and reenable
3082      fipa-ra.  */
3083   if (TARGET_THUMB1_P (opts->x_target_flags))
3084     opts->x_flag_ipa_ra = 0;
3085   else
3086     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3087
3088   /* Thumb2 inline assembly code should always use unified syntax.
3089      This will apply to ARM and Thumb1 eventually.  */
3090   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3091
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3094 #endif
3095 }
3096
3097 static sbitmap isa_all_fpubits;
3098 static sbitmap isa_quirkbits;
3099
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102    architecture have been specified, but the two are not identical.  */
3103 void
3104 arm_configure_build_target (struct arm_build_target *target,
3105                             struct cl_target_option *opts,
3106                             struct gcc_options *opts_set,
3107                             bool warn_compatible)
3108 {
3109   const cpu_option *arm_selected_tune = NULL;
3110   const arch_option *arm_selected_arch = NULL;
3111   const cpu_option *arm_selected_cpu = NULL;
3112   const arm_fpu_desc *arm_selected_fpu = NULL;
3113   const char *tune_opts = NULL;
3114   const char *arch_opts = NULL;
3115   const char *cpu_opts = NULL;
3116
3117   bitmap_clear (target->isa);
3118   target->core_name = NULL;
3119   target->arch_name = NULL;
3120
3121   if (opts_set->x_arm_arch_string)
3122     {
3123       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3124                                                       "-march",
3125                                                       opts->x_arm_arch_string);
3126       arch_opts = strchr (opts->x_arm_arch_string, '+');
3127     }
3128
3129   if (opts_set->x_arm_cpu_string)
3130     {
3131       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3132                                                     opts->x_arm_cpu_string);
3133       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3134       arm_selected_tune = arm_selected_cpu;
3135       /* If taking the tuning from -mcpu, we don't need to rescan the
3136          options for tuning.  */
3137     }
3138
3139   if (opts_set->x_arm_tune_string)
3140     {
3141       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3142                                                      opts->x_arm_tune_string);
3143       tune_opts = strchr (opts->x_arm_tune_string, '+');
3144     }
3145
3146   if (arm_selected_arch)
3147     {
3148       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3149       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3150                                  arch_opts);
3151
3152       if (arm_selected_cpu)
3153         {
3154           auto_sbitmap cpu_isa (isa_num_bits);
3155           auto_sbitmap isa_delta (isa_num_bits);
3156
3157           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3158           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3159                                      cpu_opts);
3160           bitmap_xor (isa_delta, cpu_isa, target->isa);
3161           /* Ignore any bits that are quirk bits.  */
3162           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3163           /* Ignore (for now) any bits that might be set by -mfpu.  */
3164           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3165
3166           if (!bitmap_empty_p (isa_delta))
3167             {
3168               if (warn_compatible)
3169                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170                          arm_selected_cpu->common.name,
3171                          arm_selected_arch->common.name);
3172               /* -march wins for code generation.
3173                  -mcpu wins for default tuning.  */
3174               if (!arm_selected_tune)
3175                 arm_selected_tune = arm_selected_cpu;
3176
3177               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3178               target->arch_name = arm_selected_arch->common.name;
3179             }
3180           else
3181             {
3182               /* Architecture and CPU are essentially the same.
3183                  Prefer the CPU setting.  */
3184               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3185               target->core_name = arm_selected_cpu->common.name;
3186               /* Copy the CPU's capabilities, so that we inherit the
3187                  appropriate extensions and quirks.  */
3188               bitmap_copy (target->isa, cpu_isa);
3189             }
3190         }
3191       else
3192         {
3193           /* Pick a CPU based on the architecture.  */
3194           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195           target->arch_name = arm_selected_arch->common.name;
3196           /* Note: target->core_name is left unset in this path.  */
3197         }
3198     }
3199   else if (arm_selected_cpu)
3200     {
3201       target->core_name = arm_selected_cpu->common.name;
3202       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3203       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3204                                  cpu_opts);
3205       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3206     }
3207   /* If the user did not specify a processor or architecture, choose
3208      one for them.  */
3209   else
3210     {
3211       const cpu_option *sel;
3212       auto_sbitmap sought_isa (isa_num_bits);
3213       bitmap_clear (sought_isa);
3214       auto_sbitmap default_isa (isa_num_bits);
3215
3216       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3217                                                     TARGET_CPU_DEFAULT);
3218       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3219       gcc_assert (arm_selected_cpu->common.name);
3220
3221       /* RWE: All of the selection logic below (to the end of this
3222          'if' clause) looks somewhat suspect.  It appears to be mostly
3223          there to support forcing thumb support when the default CPU
3224          does not have thumb (somewhat dubious in terms of what the
3225          user might be expecting).  I think it should be removed once
3226          support for the pre-thumb era cores is removed.  */
3227       sel = arm_selected_cpu;
3228       arm_initialize_isa (default_isa, sel->common.isa_bits);
3229       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3230                                  cpu_opts);
3231
3232       /* Now check to see if the user has specified any command line
3233          switches that require certain abilities from the cpu.  */
3234
3235       if (TARGET_INTERWORK || TARGET_THUMB)
3236         {
3237           bitmap_set_bit (sought_isa, isa_bit_thumb);
3238           bitmap_set_bit (sought_isa, isa_bit_mode32);
3239
3240           /* There are no ARM processors that support both APCS-26 and
3241              interworking.  Therefore we forcibly remove MODE26 from
3242              from the isa features here (if it was set), so that the
3243              search below will always be able to find a compatible
3244              processor.  */
3245           bitmap_clear_bit (default_isa, isa_bit_mode26);
3246         }
3247
3248       /* If there are such requirements and the default CPU does not
3249          satisfy them, we need to run over the complete list of
3250          cores looking for one that is satisfactory.  */
3251       if (!bitmap_empty_p (sought_isa)
3252           && !bitmap_subset_p (sought_isa, default_isa))
3253         {
3254           auto_sbitmap candidate_isa (isa_num_bits);
3255           /* We're only interested in a CPU with at least the
3256              capabilities of the default CPU and the required
3257              additional features.  */
3258           bitmap_ior (default_isa, default_isa, sought_isa);
3259
3260           /* Try to locate a CPU type that supports all of the abilities
3261              of the default CPU, plus the extra abilities requested by
3262              the user.  */
3263           for (sel = all_cores; sel->common.name != NULL; sel++)
3264             {
3265               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3266               /* An exact match?  */
3267               if (bitmap_equal_p (default_isa, candidate_isa))
3268                 break;
3269             }
3270
3271           if (sel->common.name == NULL)
3272             {
3273               unsigned current_bit_count = isa_num_bits;
3274               const cpu_option *best_fit = NULL;
3275
3276               /* Ideally we would like to issue an error message here
3277                  saying that it was not possible to find a CPU compatible
3278                  with the default CPU, but which also supports the command
3279                  line options specified by the programmer, and so they
3280                  ought to use the -mcpu=<name> command line option to
3281                  override the default CPU type.
3282
3283                  If we cannot find a CPU that has exactly the
3284                  characteristics of the default CPU and the given
3285                  command line options we scan the array again looking
3286                  for a best match.  The best match must have at least
3287                  the capabilities of the perfect match.  */
3288               for (sel = all_cores; sel->common.name != NULL; sel++)
3289                 {
3290                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3291
3292                   if (bitmap_subset_p (default_isa, candidate_isa))
3293                     {
3294                       unsigned count;
3295
3296                       bitmap_and_compl (candidate_isa, candidate_isa,
3297                                         default_isa);
3298                       count = bitmap_popcount (candidate_isa);
3299
3300                       if (count < current_bit_count)
3301                         {
3302                           best_fit = sel;
3303                           current_bit_count = count;
3304                         }
3305                     }
3306
3307                   gcc_assert (best_fit);
3308                   sel = best_fit;
3309                 }
3310             }
3311           arm_selected_cpu = sel;
3312         }
3313
3314       /* Now we know the CPU, we can finally initialize the target
3315          structure.  */
3316       target->core_name = arm_selected_cpu->common.name;
3317       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3318       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3319                                  cpu_opts);
3320       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3321     }
3322
3323   gcc_assert (arm_selected_cpu);
3324   gcc_assert (arm_selected_arch);
3325
3326   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3327     {
3328       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3329       auto_sbitmap fpu_bits (isa_num_bits);
3330
3331       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3332       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3333       bitmap_ior (target->isa, target->isa, fpu_bits);
3334     }
3335
3336   if (!arm_selected_tune)
3337     arm_selected_tune = arm_selected_cpu;
3338   else /* Validate the features passed to -mtune.  */
3339     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3340
3341   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3342
3343   /* Finish initializing the target structure.  */
3344   target->arch_pp_name = arm_selected_arch->arch;
3345   target->base_arch = arm_selected_arch->base_arch;
3346   target->profile = arm_selected_arch->profile;
3347
3348   target->tune_flags = tune_data->tune_flags;
3349   target->tune = tune_data->tune;
3350   target->tune_core = tune_data->scheduler;
3351   arm_option_reconfigure_globals ();
3352 }
3353
3354 /* Fix up any incompatible options that the user has specified.  */
3355 static void
3356 arm_option_override (void)
3357 {
3358   static const enum isa_feature fpu_bitlist[]
3359     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3360   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3361   cl_target_option opts;
3362
3363   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3364   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3365
3366   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3367   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3368
3369   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3370
3371   if (!global_options_set.x_arm_fpu_index)
3372     {
3373       bool ok;
3374       int fpu_index;
3375
3376       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3377                                   CL_TARGET);
3378       gcc_assert (ok);
3379       arm_fpu_index = (enum fpu_type) fpu_index;
3380     }
3381
3382   cl_target_option_save (&opts, &global_options);
3383   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3384                               true);
3385
3386 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3387   SUBTARGET_OVERRIDE_OPTIONS;
3388 #endif
3389
3390   /* Initialize boolean versions of the architectural flags, for use
3391      in the arm.md file and for enabling feature flags.  */
3392   arm_option_reconfigure_globals ();
3393
3394   arm_tune = arm_active_target.tune_core;
3395   tune_flags = arm_active_target.tune_flags;
3396   current_tune = arm_active_target.tune;
3397
3398   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3399   if (TARGET_APCS_FRAME)
3400     flag_shrink_wrap = false;
3401
3402   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3403     {
3404       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3405       target_flags |= MASK_APCS_FRAME;
3406     }
3407
3408   if (TARGET_POKE_FUNCTION_NAME)
3409     target_flags |= MASK_APCS_FRAME;
3410
3411   if (TARGET_APCS_REENT && flag_pic)
3412     error ("-fpic and -mapcs-reent are incompatible");
3413
3414   if (TARGET_APCS_REENT)
3415     warning (0, "APCS reentrant code not supported.  Ignored");
3416
3417   /* Set up some tuning parameters.  */
3418   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3419   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3420   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3421   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3422   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3423   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3424
3425   /* For arm2/3 there is no need to do any scheduling if we are doing
3426      software floating-point.  */
3427   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3428     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3429
3430   /* Override the default structure alignment for AAPCS ABI.  */
3431   if (!global_options_set.x_arm_structure_size_boundary)
3432     {
3433       if (TARGET_AAPCS_BASED)
3434         arm_structure_size_boundary = 8;
3435     }
3436   else
3437     {
3438       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3439
3440       if (arm_structure_size_boundary != 8
3441           && arm_structure_size_boundary != 32
3442           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3443         {
3444           if (ARM_DOUBLEWORD_ALIGN)
3445             warning (0,
3446                      "structure size boundary can only be set to 8, 32 or 64");
3447           else
3448             warning (0, "structure size boundary can only be set to 8 or 32");
3449           arm_structure_size_boundary
3450             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3451         }
3452     }
3453
3454   if (TARGET_VXWORKS_RTP)
3455     {
3456       if (!global_options_set.x_arm_pic_data_is_text_relative)
3457         arm_pic_data_is_text_relative = 0;
3458     }
3459   else if (flag_pic
3460            && !arm_pic_data_is_text_relative
3461            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3462     /* When text & data segments don't have a fixed displacement, the
3463        intended use is with a single, read only, pic base register.
3464        Unless the user explicitly requested not to do that, set
3465        it.  */
3466     target_flags |= MASK_SINGLE_PIC_BASE;
3467
3468   /* If stack checking is disabled, we can use r10 as the PIC register,
3469      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3470   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3471     {
3472       if (TARGET_VXWORKS_RTP)
3473         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3474       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3475     }
3476
3477   if (flag_pic && TARGET_VXWORKS_RTP)
3478     arm_pic_register = 9;
3479
3480   if (arm_pic_register_string != NULL)
3481     {
3482       int pic_register = decode_reg_name (arm_pic_register_string);
3483
3484       if (!flag_pic)
3485         warning (0, "-mpic-register= is useless without -fpic");
3486
3487       /* Prevent the user from choosing an obviously stupid PIC register.  */
3488       else if (pic_register < 0 || call_used_regs[pic_register]
3489                || pic_register == HARD_FRAME_POINTER_REGNUM
3490                || pic_register == STACK_POINTER_REGNUM
3491                || pic_register >= PC_REGNUM
3492                || (TARGET_VXWORKS_RTP
3493                    && (unsigned int) pic_register != arm_pic_register))
3494         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3495       else
3496         arm_pic_register = pic_register;
3497     }
3498
3499   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3500   if (fix_cm3_ldrd == 2)
3501     {
3502       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3503         fix_cm3_ldrd = 1;
3504       else
3505         fix_cm3_ldrd = 0;
3506     }
3507
3508   /* Hot/Cold partitioning is not currently supported, since we can't
3509      handle literal pool placement in that case.  */
3510   if (flag_reorder_blocks_and_partition)
3511     {
3512       inform (input_location,
3513               "-freorder-blocks-and-partition not supported on this architecture");
3514       flag_reorder_blocks_and_partition = 0;
3515       flag_reorder_blocks = 1;
3516     }
3517
3518   if (flag_pic)
3519     /* Hoisting PIC address calculations more aggressively provides a small,
3520        but measurable, size reduction for PIC code.  Therefore, we decrease
3521        the bar for unrestricted expression hoisting to the cost of PIC address
3522        calculation, which is 2 instructions.  */
3523     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3524                            global_options.x_param_values,
3525                            global_options_set.x_param_values);
3526
3527   /* ARM EABI defaults to strict volatile bitfields.  */
3528   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3529       && abi_version_at_least(2))
3530     flag_strict_volatile_bitfields = 1;
3531
3532   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3533      have deemed it beneficial (signified by setting
3534      prefetch.num_slots to 1 or more).  */
3535   if (flag_prefetch_loop_arrays < 0
3536       && HAVE_prefetch
3537       && optimize >= 3
3538       && current_tune->prefetch.num_slots > 0)
3539     flag_prefetch_loop_arrays = 1;
3540
3541   /* Set up parameters to be used in prefetching algorithm.  Do not
3542      override the defaults unless we are tuning for a core we have
3543      researched values for.  */
3544   if (current_tune->prefetch.num_slots > 0)
3545     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3546                            current_tune->prefetch.num_slots,
3547                            global_options.x_param_values,
3548                            global_options_set.x_param_values);
3549   if (current_tune->prefetch.l1_cache_line_size >= 0)
3550     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3551                            current_tune->prefetch.l1_cache_line_size,
3552                            global_options.x_param_values,
3553                            global_options_set.x_param_values);
3554   if (current_tune->prefetch.l1_cache_size >= 0)
3555     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3556                            current_tune->prefetch.l1_cache_size,
3557                            global_options.x_param_values,
3558                            global_options_set.x_param_values);
3559
3560   /* Use Neon to perform 64-bits operations rather than core
3561      registers.  */
3562   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3563   if (use_neon_for_64bits == 1)
3564      prefer_neon_for_64bits = true;
3565
3566   /* Use the alternative scheduling-pressure algorithm by default.  */
3567   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3568                          global_options.x_param_values,
3569                          global_options_set.x_param_values);
3570
3571   /* Look through ready list and all of queue for instructions
3572      relevant for L2 auto-prefetcher.  */
3573   int param_sched_autopref_queue_depth;
3574
3575   switch (current_tune->sched_autopref)
3576     {
3577     case tune_params::SCHED_AUTOPREF_OFF:
3578       param_sched_autopref_queue_depth = -1;
3579       break;
3580
3581     case tune_params::SCHED_AUTOPREF_RANK:
3582       param_sched_autopref_queue_depth = 0;
3583       break;
3584
3585     case tune_params::SCHED_AUTOPREF_FULL:
3586       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3587       break;
3588
3589     default:
3590       gcc_unreachable ();
3591     }
3592
3593   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3594                          param_sched_autopref_queue_depth,
3595                          global_options.x_param_values,
3596                          global_options_set.x_param_values);
3597
3598   /* Currently, for slow flash data, we just disable literal pools.  We also
3599      disable it for pure-code.  */
3600   if (target_slow_flash_data || target_pure_code)
3601     arm_disable_literal_pool = true;
3602
3603   /* Disable scheduling fusion by default if it's not armv7 processor
3604      or doesn't prefer ldrd/strd.  */
3605   if (flag_schedule_fusion == 2
3606       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3607     flag_schedule_fusion = 0;
3608
3609   /* Need to remember initial options before they are overriden.  */
3610   init_optimize = build_optimization_node (&global_options);
3611
3612   arm_options_perform_arch_sanity_checks ();
3613   arm_option_override_internal (&global_options, &global_options_set);
3614   arm_option_check_internal (&global_options);
3615   arm_option_params_internal ();
3616
3617   /* Create the default target_options structure.  */
3618   target_option_default_node = target_option_current_node
3619     = build_target_option_node (&global_options);
3620
3621   /* Register global variables with the garbage collector.  */
3622   arm_add_gc_roots ();
3623
3624   /* Init initial mode for testing.  */
3625   thumb_flipper = TARGET_THUMB;
3626 }
3627
3628
3629 /* Reconfigure global status flags from the active_target.isa.  */
3630 void
3631 arm_option_reconfigure_globals (void)
3632 {
3633   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3634   arm_base_arch = arm_active_target.base_arch;
3635
3636   /* Initialize boolean versions of the architectural flags, for use
3637      in the arm.md file.  */
3638   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3639   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3640   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3641   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3642   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3643   arm_arch5te = arm_arch5e
3644     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3645   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3646   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3647   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3648   arm_arch6m = arm_arch6 && !arm_arch_notm;
3649   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3650   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3651   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3652   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3653   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3654   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3655   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3656   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3657   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3658   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3659   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3660   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3661   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3662   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3663   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3664   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3665   if (arm_fp16_inst)
3666     {
3667       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3668         error ("selected fp16 options are incompatible");
3669       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3670     }
3671
3672   /* And finally, set up some quirks.  */
3673   arm_arch_no_volatile_ce
3674     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3675   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3676                                             isa_bit_quirk_armv6kz);
3677
3678   /* Use the cp15 method if it is available.  */
3679   if (target_thread_pointer == TP_AUTO)
3680     {
3681       if (arm_arch6k && !TARGET_THUMB1)
3682         target_thread_pointer = TP_CP15;
3683       else
3684         target_thread_pointer = TP_SOFT;
3685     }
3686 }
3687
3688 /* Perform some validation between the desired architecture and the rest of the
3689    options.  */
3690 void
3691 arm_options_perform_arch_sanity_checks (void)
3692 {
3693   /* V5 code we generate is completely interworking capable, so we turn off
3694      TARGET_INTERWORK here to avoid many tests later on.  */
3695
3696   /* XXX However, we must pass the right pre-processor defines to CPP
3697      or GLD can get confused.  This is a hack.  */
3698   if (TARGET_INTERWORK)
3699     arm_cpp_interwork = 1;
3700
3701   if (arm_arch5)
3702     target_flags &= ~MASK_INTERWORK;
3703
3704   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3705     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3706
3707   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3708     error ("iwmmxt abi requires an iwmmxt capable cpu");
3709
3710   /* BPABI targets use linker tricks to allow interworking on cores
3711      without thumb support.  */
3712   if (TARGET_INTERWORK
3713       && !TARGET_BPABI
3714       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3715     {
3716       warning (0, "target CPU does not support interworking" );
3717       target_flags &= ~MASK_INTERWORK;
3718     }
3719
3720   /* If soft-float is specified then don't use FPU.  */
3721   if (TARGET_SOFT_FLOAT)
3722     arm_fpu_attr = FPU_NONE;
3723   else
3724     arm_fpu_attr = FPU_VFP;
3725
3726   if (TARGET_AAPCS_BASED)
3727     {
3728       if (TARGET_CALLER_INTERWORKING)
3729         error ("AAPCS does not support -mcaller-super-interworking");
3730       else
3731         if (TARGET_CALLEE_INTERWORKING)
3732           error ("AAPCS does not support -mcallee-super-interworking");
3733     }
3734
3735   /* __fp16 support currently assumes the core has ldrh.  */
3736   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3737     sorry ("__fp16 and no ldrh");
3738
3739   if (use_cmse && !arm_arch_cmse)
3740     error ("target CPU does not support ARMv8-M Security Extensions");
3741
3742   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3743      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3744   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3745     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3746
3747
3748   if (TARGET_AAPCS_BASED)
3749     {
3750       if (arm_abi == ARM_ABI_IWMMXT)
3751         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3752       else if (TARGET_HARD_FLOAT_ABI)
3753         {
3754           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3755           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3756             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3757         }
3758       else
3759         arm_pcs_default = ARM_PCS_AAPCS;
3760     }
3761   else
3762     {
3763       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3764         sorry ("-mfloat-abi=hard and VFP");
3765
3766       if (arm_abi == ARM_ABI_APCS)
3767         arm_pcs_default = ARM_PCS_APCS;
3768       else
3769         arm_pcs_default = ARM_PCS_ATPCS;
3770     }
3771 }
3772
3773 static void
3774 arm_add_gc_roots (void)
3775 {
3776   gcc_obstack_init(&minipool_obstack);
3777   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3778 }
3779 \f
3780 /* A table of known ARM exception types.
3781    For use with the interrupt function attribute.  */
3782
3783 typedef struct
3784 {
3785   const char *const arg;
3786   const unsigned long return_value;
3787 }
3788 isr_attribute_arg;
3789
3790 static const isr_attribute_arg isr_attribute_args [] =
3791 {
3792   { "IRQ",   ARM_FT_ISR },
3793   { "irq",   ARM_FT_ISR },
3794   { "FIQ",   ARM_FT_FIQ },
3795   { "fiq",   ARM_FT_FIQ },
3796   { "ABORT", ARM_FT_ISR },
3797   { "abort", ARM_FT_ISR },
3798   { "ABORT", ARM_FT_ISR },
3799   { "abort", ARM_FT_ISR },
3800   { "UNDEF", ARM_FT_EXCEPTION },
3801   { "undef", ARM_FT_EXCEPTION },
3802   { "SWI",   ARM_FT_EXCEPTION },
3803   { "swi",   ARM_FT_EXCEPTION },
3804   { NULL,    ARM_FT_NORMAL }
3805 };
3806
3807 /* Returns the (interrupt) function type of the current
3808    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3809
3810 static unsigned long
3811 arm_isr_value (tree argument)
3812 {
3813   const isr_attribute_arg * ptr;
3814   const char *              arg;
3815
3816   if (!arm_arch_notm)
3817     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3818
3819   /* No argument - default to IRQ.  */
3820   if (argument == NULL_TREE)
3821     return ARM_FT_ISR;
3822
3823   /* Get the value of the argument.  */
3824   if (TREE_VALUE (argument) == NULL_TREE
3825       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3826     return ARM_FT_UNKNOWN;
3827
3828   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3829
3830   /* Check it against the list of known arguments.  */
3831   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3832     if (streq (arg, ptr->arg))
3833       return ptr->return_value;
3834
3835   /* An unrecognized interrupt type.  */
3836   return ARM_FT_UNKNOWN;
3837 }
3838
3839 /* Computes the type of the current function.  */
3840
3841 static unsigned long
3842 arm_compute_func_type (void)
3843 {
3844   unsigned long type = ARM_FT_UNKNOWN;
3845   tree a;
3846   tree attr;
3847
3848   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3849
3850   /* Decide if the current function is volatile.  Such functions
3851      never return, and many memory cycles can be saved by not storing
3852      register values that will never be needed again.  This optimization
3853      was added to speed up context switching in a kernel application.  */
3854   if (optimize > 0
3855       && (TREE_NOTHROW (current_function_decl)
3856           || !(flag_unwind_tables
3857                || (flag_exceptions
3858                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3859       && TREE_THIS_VOLATILE (current_function_decl))
3860     type |= ARM_FT_VOLATILE;
3861
3862   if (cfun->static_chain_decl != NULL)
3863     type |= ARM_FT_NESTED;
3864
3865   attr = DECL_ATTRIBUTES (current_function_decl);
3866
3867   a = lookup_attribute ("naked", attr);
3868   if (a != NULL_TREE)
3869     type |= ARM_FT_NAKED;
3870
3871   a = lookup_attribute ("isr", attr);
3872   if (a == NULL_TREE)
3873     a = lookup_attribute ("interrupt", attr);
3874
3875   if (a == NULL_TREE)
3876     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3877   else
3878     type |= arm_isr_value (TREE_VALUE (a));
3879
3880   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3881     type |= ARM_FT_CMSE_ENTRY;
3882
3883   return type;
3884 }
3885
3886 /* Returns the type of the current function.  */
3887
3888 unsigned long
3889 arm_current_func_type (void)
3890 {
3891   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3892     cfun->machine->func_type = arm_compute_func_type ();
3893
3894   return cfun->machine->func_type;
3895 }
3896
3897 bool
3898 arm_allocate_stack_slots_for_args (void)
3899 {
3900   /* Naked functions should not allocate stack slots for arguments.  */
3901   return !IS_NAKED (arm_current_func_type ());
3902 }
3903
3904 static bool
3905 arm_warn_func_return (tree decl)
3906 {
3907   /* Naked functions are implemented entirely in assembly, including the
3908      return sequence, so suppress warnings about this.  */
3909   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3910 }
3911
3912 \f
3913 /* Output assembler code for a block containing the constant parts
3914    of a trampoline, leaving space for the variable parts.
3915
3916    On the ARM, (if r8 is the static chain regnum, and remembering that
3917    referencing pc adds an offset of 8) the trampoline looks like:
3918            ldr          r8, [pc, #0]
3919            ldr          pc, [pc]
3920            .word        static chain value
3921            .word        function's address
3922    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3923
3924 static void
3925 arm_asm_trampoline_template (FILE *f)
3926 {
3927   fprintf (f, "\t.syntax unified\n");
3928
3929   if (TARGET_ARM)
3930     {
3931       fprintf (f, "\t.arm\n");
3932       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3933       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3934     }
3935   else if (TARGET_THUMB2)
3936     {
3937       fprintf (f, "\t.thumb\n");
3938       /* The Thumb-2 trampoline is similar to the arm implementation.
3939          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3940       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3941                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3942       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3943     }
3944   else
3945     {
3946       ASM_OUTPUT_ALIGN (f, 2);
3947       fprintf (f, "\t.code\t16\n");
3948       fprintf (f, ".Ltrampoline_start:\n");
3949       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3950       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3952       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3953       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3954       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3955     }
3956   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3957   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3958 }
3959
3960 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3961
3962 static void
3963 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3964 {
3965   rtx fnaddr, mem, a_tramp;
3966
3967   emit_block_move (m_tramp, assemble_trampoline_template (),
3968                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3969
3970   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3971   emit_move_insn (mem, chain_value);
3972
3973   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3974   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3975   emit_move_insn (mem, fnaddr);
3976
3977   a_tramp = XEXP (m_tramp, 0);
3978   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3979                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3980                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3981 }
3982
3983 /* Thumb trampolines should be entered in thumb mode, so set
3984    the bottom bit of the address.  */
3985
3986 static rtx
3987 arm_trampoline_adjust_address (rtx addr)
3988 {
3989   if (TARGET_THUMB)
3990     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3991                                 NULL, 0, OPTAB_LIB_WIDEN);
3992   return addr;
3993 }
3994 \f
3995 /* Return 1 if it is possible to return using a single instruction.
3996    If SIBLING is non-null, this is a test for a return before a sibling
3997    call.  SIBLING is the call insn, so we can examine its register usage.  */
3998
3999 int
4000 use_return_insn (int iscond, rtx sibling)
4001 {
4002   int regno;
4003   unsigned int func_type;
4004   unsigned long saved_int_regs;
4005   unsigned HOST_WIDE_INT stack_adjust;
4006   arm_stack_offsets *offsets;
4007
4008   /* Never use a return instruction before reload has run.  */
4009   if (!reload_completed)
4010     return 0;
4011
4012   func_type = arm_current_func_type ();
4013
4014   /* Naked, volatile and stack alignment functions need special
4015      consideration.  */
4016   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4017     return 0;
4018
4019   /* So do interrupt functions that use the frame pointer and Thumb
4020      interrupt functions.  */
4021   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4022     return 0;
4023
4024   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4025       && !optimize_function_for_size_p (cfun))
4026     return 0;
4027
4028   offsets = arm_get_frame_offsets ();
4029   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4030
4031   /* As do variadic functions.  */
4032   if (crtl->args.pretend_args_size
4033       || cfun->machine->uses_anonymous_args
4034       /* Or if the function calls __builtin_eh_return () */
4035       || crtl->calls_eh_return
4036       /* Or if the function calls alloca */
4037       || cfun->calls_alloca
4038       /* Or if there is a stack adjustment.  However, if the stack pointer
4039          is saved on the stack, we can use a pre-incrementing stack load.  */
4040       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4041                                  && stack_adjust == 4))
4042       /* Or if the static chain register was saved above the frame, under the
4043          assumption that the stack pointer isn't saved on the stack.  */
4044       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4045           && arm_compute_static_chain_stack_bytes() != 0))
4046     return 0;
4047
4048   saved_int_regs = offsets->saved_regs_mask;
4049
4050   /* Unfortunately, the insn
4051
4052        ldmib sp, {..., sp, ...}
4053
4054      triggers a bug on most SA-110 based devices, such that the stack
4055      pointer won't be correctly restored if the instruction takes a
4056      page fault.  We work around this problem by popping r3 along with
4057      the other registers, since that is never slower than executing
4058      another instruction.
4059
4060      We test for !arm_arch5 here, because code for any architecture
4061      less than this could potentially be run on one of the buggy
4062      chips.  */
4063   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4064     {
4065       /* Validate that r3 is a call-clobbered register (always true in
4066          the default abi) ...  */
4067       if (!call_used_regs[3])
4068         return 0;
4069
4070       /* ... that it isn't being used for a return value ... */
4071       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4072         return 0;
4073
4074       /* ... or for a tail-call argument ...  */
4075       if (sibling)
4076         {
4077           gcc_assert (CALL_P (sibling));
4078
4079           if (find_regno_fusage (sibling, USE, 3))
4080             return 0;
4081         }
4082
4083       /* ... and that there are no call-saved registers in r0-r2
4084          (always true in the default ABI).  */
4085       if (saved_int_regs & 0x7)
4086         return 0;
4087     }
4088
4089   /* Can't be done if interworking with Thumb, and any registers have been
4090      stacked.  */
4091   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4092     return 0;
4093
4094   /* On StrongARM, conditional returns are expensive if they aren't
4095      taken and multiple registers have been stacked.  */
4096   if (iscond && arm_tune_strongarm)
4097     {
4098       /* Conditional return when just the LR is stored is a simple
4099          conditional-load instruction, that's not expensive.  */
4100       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4101         return 0;
4102
4103       if (flag_pic
4104           && arm_pic_register != INVALID_REGNUM
4105           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4106         return 0;
4107     }
4108
4109   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4110      several instructions if anything needs to be popped.  */
4111   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4112     return 0;
4113
4114   /* If there are saved registers but the LR isn't saved, then we need
4115      two instructions for the return.  */
4116   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4117     return 0;
4118
4119   /* Can't be done if any of the VFP regs are pushed,
4120      since this also requires an insn.  */
4121   if (TARGET_HARD_FLOAT)
4122     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4123       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4124         return 0;
4125
4126   if (TARGET_REALLY_IWMMXT)
4127     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4128       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4129         return 0;
4130
4131   return 1;
4132 }
4133
4134 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4135    shrink-wrapping if possible.  This is the case if we need to emit a
4136    prologue, which we can test by looking at the offsets.  */
4137 bool
4138 use_simple_return_p (void)
4139 {
4140   arm_stack_offsets *offsets;
4141
4142   /* Note this function can be called before or after reload.  */
4143   if (!reload_completed)
4144     arm_compute_frame_layout ();
4145
4146   offsets = arm_get_frame_offsets ();
4147   return offsets->outgoing_args != 0;
4148 }
4149
4150 /* Return TRUE if int I is a valid immediate ARM constant.  */
4151
4152 int
4153 const_ok_for_arm (HOST_WIDE_INT i)
4154 {
4155   int lowbit;
4156
4157   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4158      be all zero, or all one.  */
4159   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4160       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4161           != ((~(unsigned HOST_WIDE_INT) 0)
4162               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4163     return FALSE;
4164
4165   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4166
4167   /* Fast return for 0 and small values.  We must do this for zero, since
4168      the code below can't handle that one case.  */
4169   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4170     return TRUE;
4171
4172   /* Get the number of trailing zeros.  */
4173   lowbit = ffs((int) i) - 1;
4174
4175   /* Only even shifts are allowed in ARM mode so round down to the
4176      nearest even number.  */
4177   if (TARGET_ARM)
4178     lowbit &= ~1;
4179
4180   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4181     return TRUE;
4182
4183   if (TARGET_ARM)
4184     {
4185       /* Allow rotated constants in ARM mode.  */
4186       if (lowbit <= 4
4187            && ((i & ~0xc000003f) == 0
4188                || (i & ~0xf000000f) == 0
4189                || (i & ~0xfc000003) == 0))
4190         return TRUE;
4191     }
4192   else if (TARGET_THUMB2)
4193     {
4194       HOST_WIDE_INT v;
4195
4196       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4197       v = i & 0xff;
4198       v |= v << 16;
4199       if (i == v || i == (v | (v << 8)))
4200         return TRUE;
4201
4202       /* Allow repeated pattern 0xXY00XY00.  */
4203       v = i & 0xff00;
4204       v |= v << 16;
4205       if (i == v)
4206         return TRUE;
4207     }
4208   else if (TARGET_HAVE_MOVT)
4209     {
4210       /* Thumb-1 Targets with MOVT.  */
4211       if (i > 0xffff)
4212         return FALSE;
4213       else
4214         return TRUE;
4215     }
4216
4217   return FALSE;
4218 }
4219
4220 /* Return true if I is a valid constant for the operation CODE.  */
4221 int
4222 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4223 {
4224   if (const_ok_for_arm (i))
4225     return 1;
4226
4227   switch (code)
4228     {
4229     case SET:
4230       /* See if we can use movw.  */
4231       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4232         return 1;
4233       else
4234         /* Otherwise, try mvn.  */
4235         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4236
4237     case PLUS:
4238       /* See if we can use addw or subw.  */
4239       if (TARGET_THUMB2
4240           && ((i & 0xfffff000) == 0
4241               || ((-i) & 0xfffff000) == 0))
4242         return 1;
4243       /* Fall through.  */
4244     case COMPARE:
4245     case EQ:
4246     case NE:
4247     case GT:
4248     case LE:
4249     case LT:
4250     case GE:
4251     case GEU:
4252     case LTU:
4253     case GTU:
4254     case LEU:
4255     case UNORDERED:
4256     case ORDERED:
4257     case UNEQ:
4258     case UNGE:
4259     case UNLT:
4260     case UNGT:
4261     case UNLE:
4262       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4263
4264     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4265     case XOR:
4266       return 0;
4267
4268     case IOR:
4269       if (TARGET_THUMB2)
4270         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4271       return 0;
4272
4273     case AND:
4274       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4275
4276     default:
4277       gcc_unreachable ();
4278     }
4279 }
4280
4281 /* Return true if I is a valid di mode constant for the operation CODE.  */
4282 int
4283 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4284 {
4285   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4286   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4287   rtx hi = GEN_INT (hi_val);
4288   rtx lo = GEN_INT (lo_val);
4289
4290   if (TARGET_THUMB1)
4291     return 0;
4292
4293   switch (code)
4294     {
4295     case AND:
4296     case IOR:
4297     case XOR:
4298       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4299               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4300     case PLUS:
4301       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4302
4303     default:
4304       return 0;
4305     }
4306 }
4307
4308 /* Emit a sequence of insns to handle a large constant.
4309    CODE is the code of the operation required, it can be any of SET, PLUS,
4310    IOR, AND, XOR, MINUS;
4311    MODE is the mode in which the operation is being performed;
4312    VAL is the integer to operate on;
4313    SOURCE is the other operand (a register, or a null-pointer for SET);
4314    SUBTARGETS means it is safe to create scratch registers if that will
4315    either produce a simpler sequence, or we will want to cse the values.
4316    Return value is the number of insns emitted.  */
4317
4318 /* ??? Tweak this for thumb2.  */
4319 int
4320 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4321                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4322 {
4323   rtx cond;
4324
4325   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4326     cond = COND_EXEC_TEST (PATTERN (insn));
4327   else
4328     cond = NULL_RTX;
4329
4330   if (subtargets || code == SET
4331       || (REG_P (target) && REG_P (source)
4332           && REGNO (target) != REGNO (source)))
4333     {
4334       /* After arm_reorg has been called, we can't fix up expensive
4335          constants by pushing them into memory so we must synthesize
4336          them in-line, regardless of the cost.  This is only likely to
4337          be more costly on chips that have load delay slots and we are
4338          compiling without running the scheduler (so no splitting
4339          occurred before the final instruction emission).
4340
4341          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4342       */
4343       if (!cfun->machine->after_arm_reorg
4344           && !cond
4345           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4346                                 1, 0)
4347               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4348                  + (code != SET))))
4349         {
4350           if (code == SET)
4351             {
4352               /* Currently SET is the only monadic value for CODE, all
4353                  the rest are diadic.  */
4354               if (TARGET_USE_MOVT)
4355                 arm_emit_movpair (target, GEN_INT (val));
4356               else
4357                 emit_set_insn (target, GEN_INT (val));
4358
4359               return 1;
4360             }
4361           else
4362             {
4363               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4364
4365               if (TARGET_USE_MOVT)
4366                 arm_emit_movpair (temp, GEN_INT (val));
4367               else
4368                 emit_set_insn (temp, GEN_INT (val));
4369
4370               /* For MINUS, the value is subtracted from, since we never
4371                  have subtraction of a constant.  */
4372               if (code == MINUS)
4373                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4374               else
4375                 emit_set_insn (target,
4376                                gen_rtx_fmt_ee (code, mode, source, temp));
4377               return 2;
4378             }
4379         }
4380     }
4381
4382   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4383                            1);
4384 }
4385
4386 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4387    ARM/THUMB2 immediates, and add up to VAL.
4388    Thr function return value gives the number of insns required.  */
4389 static int
4390 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4391                             struct four_ints *return_sequence)
4392 {
4393   int best_consecutive_zeros = 0;
4394   int i;
4395   int best_start = 0;
4396   int insns1, insns2;
4397   struct four_ints tmp_sequence;
4398
4399   /* If we aren't targeting ARM, the best place to start is always at
4400      the bottom, otherwise look more closely.  */
4401   if (TARGET_ARM)
4402     {
4403       for (i = 0; i < 32; i += 2)
4404         {
4405           int consecutive_zeros = 0;
4406
4407           if (!(val & (3 << i)))
4408             {
4409               while ((i < 32) && !(val & (3 << i)))
4410                 {
4411                   consecutive_zeros += 2;
4412                   i += 2;
4413                 }
4414               if (consecutive_zeros > best_consecutive_zeros)
4415                 {
4416                   best_consecutive_zeros = consecutive_zeros;
4417                   best_start = i - consecutive_zeros;
4418                 }
4419               i -= 2;
4420             }
4421         }
4422     }
4423
4424   /* So long as it won't require any more insns to do so, it's
4425      desirable to emit a small constant (in bits 0...9) in the last
4426      insn.  This way there is more chance that it can be combined with
4427      a later addressing insn to form a pre-indexed load or store
4428      operation.  Consider:
4429
4430            *((volatile int *)0xe0000100) = 1;
4431            *((volatile int *)0xe0000110) = 2;
4432
4433      We want this to wind up as:
4434
4435             mov rA, #0xe0000000
4436             mov rB, #1
4437             str rB, [rA, #0x100]
4438             mov rB, #2
4439             str rB, [rA, #0x110]
4440
4441      rather than having to synthesize both large constants from scratch.
4442
4443      Therefore, we calculate how many insns would be required to emit
4444      the constant starting from `best_start', and also starting from
4445      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4446      yield a shorter sequence, we may as well use zero.  */
4447   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4448   if (best_start != 0
4449       && ((HOST_WIDE_INT_1U << best_start) < val))
4450     {
4451       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4452       if (insns2 <= insns1)
4453         {
4454           *return_sequence = tmp_sequence;
4455           insns1 = insns2;
4456         }
4457     }
4458
4459   return insns1;
4460 }
4461
4462 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4463 static int
4464 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4465                              struct four_ints *return_sequence, int i)
4466 {
4467   int remainder = val & 0xffffffff;
4468   int insns = 0;
4469
4470   /* Try and find a way of doing the job in either two or three
4471      instructions.
4472
4473      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4474      location.  We start at position I.  This may be the MSB, or
4475      optimial_immediate_sequence may have positioned it at the largest block
4476      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4477      wrapping around to the top of the word when we drop off the bottom.
4478      In the worst case this code should produce no more than four insns.
4479
4480      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4481      constants, shifted to any arbitrary location.  We should always start
4482      at the MSB.  */
4483   do
4484     {
4485       int end;
4486       unsigned int b1, b2, b3, b4;
4487       unsigned HOST_WIDE_INT result;
4488       int loc;
4489
4490       gcc_assert (insns < 4);
4491
4492       if (i <= 0)
4493         i += 32;
4494
4495       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4496       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4497         {
4498           loc = i;
4499           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4500             /* We can use addw/subw for the last 12 bits.  */
4501             result = remainder;
4502           else
4503             {
4504               /* Use an 8-bit shifted/rotated immediate.  */
4505               end = i - 8;
4506               if (end < 0)
4507                 end += 32;
4508               result = remainder & ((0x0ff << end)
4509                                    | ((i < end) ? (0xff >> (32 - end))
4510                                                 : 0));
4511               i -= 8;
4512             }
4513         }
4514       else
4515         {
4516           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4517              arbitrary shifts.  */
4518           i -= TARGET_ARM ? 2 : 1;
4519           continue;
4520         }
4521
4522       /* Next, see if we can do a better job with a thumb2 replicated
4523          constant.
4524
4525          We do it this way around to catch the cases like 0x01F001E0 where
4526          two 8-bit immediates would work, but a replicated constant would
4527          make it worse.
4528
4529          TODO: 16-bit constants that don't clear all the bits, but still win.
4530          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4531       if (TARGET_THUMB2)
4532         {
4533           b1 = (remainder & 0xff000000) >> 24;
4534           b2 = (remainder & 0x00ff0000) >> 16;
4535           b3 = (remainder & 0x0000ff00) >> 8;
4536           b4 = remainder & 0xff;
4537
4538           if (loc > 24)
4539             {
4540               /* The 8-bit immediate already found clears b1 (and maybe b2),
4541                  but must leave b3 and b4 alone.  */
4542
4543               /* First try to find a 32-bit replicated constant that clears
4544                  almost everything.  We can assume that we can't do it in one,
4545                  or else we wouldn't be here.  */
4546               unsigned int tmp = b1 & b2 & b3 & b4;
4547               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4548                                   + (tmp << 24);
4549               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4550                                             + (tmp == b3) + (tmp == b4);
4551               if (tmp
4552                   && (matching_bytes >= 3
4553                       || (matching_bytes == 2
4554                           && const_ok_for_op (remainder & ~tmp2, code))))
4555                 {
4556                   /* At least 3 of the bytes match, and the fourth has at
4557                      least as many bits set, or two of the bytes match
4558                      and it will only require one more insn to finish.  */
4559                   result = tmp2;
4560                   i = tmp != b1 ? 32
4561                       : tmp != b2 ? 24
4562                       : tmp != b3 ? 16
4563                       : 8;
4564                 }
4565
4566               /* Second, try to find a 16-bit replicated constant that can
4567                  leave three of the bytes clear.  If b2 or b4 is already
4568                  zero, then we can.  If the 8-bit from above would not
4569                  clear b2 anyway, then we still win.  */
4570               else if (b1 == b3 && (!b2 || !b4
4571                                || (remainder & 0x00ff0000 & ~result)))
4572                 {
4573                   result = remainder & 0xff00ff00;
4574                   i = 24;
4575                 }
4576             }
4577           else if (loc > 16)
4578             {
4579               /* The 8-bit immediate already found clears b2 (and maybe b3)
4580                  and we don't get here unless b1 is alredy clear, but it will
4581                  leave b4 unchanged.  */
4582
4583               /* If we can clear b2 and b4 at once, then we win, since the
4584                  8-bits couldn't possibly reach that far.  */
4585               if (b2 == b4)
4586                 {
4587                   result = remainder & 0x00ff00ff;
4588                   i = 16;
4589                 }
4590             }
4591         }
4592
4593       return_sequence->i[insns++] = result;
4594       remainder &= ~result;
4595
4596       if (code == SET || code == MINUS)
4597         code = PLUS;
4598     }
4599   while (remainder);
4600
4601   return insns;
4602 }
4603
4604 /* Emit an instruction with the indicated PATTERN.  If COND is
4605    non-NULL, conditionalize the execution of the instruction on COND
4606    being true.  */
4607
4608 static void
4609 emit_constant_insn (rtx cond, rtx pattern)
4610 {
4611   if (cond)
4612     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4613   emit_insn (pattern);
4614 }
4615
4616 /* As above, but extra parameter GENERATE which, if clear, suppresses
4617    RTL generation.  */
4618
4619 static int
4620 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4621                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4622                   int subtargets, int generate)
4623 {
4624   int can_invert = 0;
4625   int can_negate = 0;
4626   int final_invert = 0;
4627   int i;
4628   int set_sign_bit_copies = 0;
4629   int clear_sign_bit_copies = 0;
4630   int clear_zero_bit_copies = 0;
4631   int set_zero_bit_copies = 0;
4632   int insns = 0, neg_insns, inv_insns;
4633   unsigned HOST_WIDE_INT temp1, temp2;
4634   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4635   struct four_ints *immediates;
4636   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4637
4638   /* Find out which operations are safe for a given CODE.  Also do a quick
4639      check for degenerate cases; these can occur when DImode operations
4640      are split.  */
4641   switch (code)
4642     {
4643     case SET:
4644       can_invert = 1;
4645       break;
4646
4647     case PLUS:
4648       can_negate = 1;
4649       break;
4650
4651     case IOR:
4652       if (remainder == 0xffffffff)
4653         {
4654           if (generate)
4655             emit_constant_insn (cond,
4656                                 gen_rtx_SET (target,
4657                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4658           return 1;
4659         }
4660
4661       if (remainder == 0)
4662         {
4663           if (reload_completed && rtx_equal_p (target, source))
4664             return 0;
4665
4666           if (generate)
4667             emit_constant_insn (cond, gen_rtx_SET (target, source));
4668           return 1;
4669         }
4670       break;
4671
4672     case AND:
4673       if (remainder == 0)
4674         {
4675           if (generate)
4676             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4677           return 1;
4678         }
4679       if (remainder == 0xffffffff)
4680         {
4681           if (reload_completed && rtx_equal_p (target, source))
4682             return 0;
4683           if (generate)
4684             emit_constant_insn (cond, gen_rtx_SET (target, source));
4685           return 1;
4686         }
4687       can_invert = 1;
4688       break;
4689
4690     case XOR:
4691       if (remainder == 0)
4692         {
4693           if (reload_completed && rtx_equal_p (target, source))
4694             return 0;
4695           if (generate)
4696             emit_constant_insn (cond, gen_rtx_SET (target, source));
4697           return 1;
4698         }
4699
4700       if (remainder == 0xffffffff)
4701         {
4702           if (generate)
4703             emit_constant_insn (cond,
4704                                 gen_rtx_SET (target,
4705                                              gen_rtx_NOT (mode, source)));
4706           return 1;
4707         }
4708       final_invert = 1;
4709       break;
4710
4711     case MINUS:
4712       /* We treat MINUS as (val - source), since (source - val) is always
4713          passed as (source + (-val)).  */
4714       if (remainder == 0)
4715         {
4716           if (generate)
4717             emit_constant_insn (cond,
4718                                 gen_rtx_SET (target,
4719                                              gen_rtx_NEG (mode, source)));
4720           return 1;
4721         }
4722       if (const_ok_for_arm (val))
4723         {
4724           if (generate)
4725             emit_constant_insn (cond,
4726                                 gen_rtx_SET (target,
4727                                              gen_rtx_MINUS (mode, GEN_INT (val),
4728                                                             source)));
4729           return 1;
4730         }
4731
4732       break;
4733
4734     default:
4735       gcc_unreachable ();
4736     }
4737
4738   /* If we can do it in one insn get out quickly.  */
4739   if (const_ok_for_op (val, code))
4740     {
4741       if (generate)
4742         emit_constant_insn (cond,
4743                             gen_rtx_SET (target,
4744                                          (source
4745                                           ? gen_rtx_fmt_ee (code, mode, source,
4746                                                             GEN_INT (val))
4747                                           : GEN_INT (val))));
4748       return 1;
4749     }
4750
4751   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4752      insn.  */
4753   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4754       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4755     {
4756       if (generate)
4757         {
4758           if (mode == SImode && i == 16)
4759             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4760                smaller insn.  */
4761             emit_constant_insn (cond,
4762                                 gen_zero_extendhisi2
4763                                 (target, gen_lowpart (HImode, source)));
4764           else
4765             /* Extz only supports SImode, but we can coerce the operands
4766                into that mode.  */
4767             emit_constant_insn (cond,
4768                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4769                                               gen_lowpart (SImode, source),
4770                                               GEN_INT (i), const0_rtx));
4771         }
4772
4773       return 1;
4774     }
4775
4776   /* Calculate a few attributes that may be useful for specific
4777      optimizations.  */
4778   /* Count number of leading zeros.  */
4779   for (i = 31; i >= 0; i--)
4780     {
4781       if ((remainder & (1 << i)) == 0)
4782         clear_sign_bit_copies++;
4783       else
4784         break;
4785     }
4786
4787   /* Count number of leading 1's.  */
4788   for (i = 31; i >= 0; i--)
4789     {
4790       if ((remainder & (1 << i)) != 0)
4791         set_sign_bit_copies++;
4792       else
4793         break;
4794     }
4795
4796   /* Count number of trailing zero's.  */
4797   for (i = 0; i <= 31; i++)
4798     {
4799       if ((remainder & (1 << i)) == 0)
4800         clear_zero_bit_copies++;
4801       else
4802         break;
4803     }
4804
4805   /* Count number of trailing 1's.  */
4806   for (i = 0; i <= 31; i++)
4807     {
4808       if ((remainder & (1 << i)) != 0)
4809         set_zero_bit_copies++;
4810       else
4811         break;
4812     }
4813
4814   switch (code)
4815     {
4816     case SET:
4817       /* See if we can do this by sign_extending a constant that is known
4818          to be negative.  This is a good, way of doing it, since the shift
4819          may well merge into a subsequent insn.  */
4820       if (set_sign_bit_copies > 1)
4821         {
4822           if (const_ok_for_arm
4823               (temp1 = ARM_SIGN_EXTEND (remainder
4824                                         << (set_sign_bit_copies - 1))))
4825             {
4826               if (generate)
4827                 {
4828                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4829                   emit_constant_insn (cond,
4830                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4831                   emit_constant_insn (cond,
4832                                       gen_ashrsi3 (target, new_src,
4833                                                    GEN_INT (set_sign_bit_copies - 1)));
4834                 }
4835               return 2;
4836             }
4837           /* For an inverted constant, we will need to set the low bits,
4838              these will be shifted out of harm's way.  */
4839           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4840           if (const_ok_for_arm (~temp1))
4841             {
4842               if (generate)
4843                 {
4844                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4845                   emit_constant_insn (cond,
4846                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4847                   emit_constant_insn (cond,
4848                                       gen_ashrsi3 (target, new_src,
4849                                                    GEN_INT (set_sign_bit_copies - 1)));
4850                 }
4851               return 2;
4852             }
4853         }
4854
4855       /* See if we can calculate the value as the difference between two
4856          valid immediates.  */
4857       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4858         {
4859           int topshift = clear_sign_bit_copies & ~1;
4860
4861           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4862                                    & (0xff000000 >> topshift));
4863
4864           /* If temp1 is zero, then that means the 9 most significant
4865              bits of remainder were 1 and we've caused it to overflow.
4866              When topshift is 0 we don't need to do anything since we
4867              can borrow from 'bit 32'.  */
4868           if (temp1 == 0 && topshift != 0)
4869             temp1 = 0x80000000 >> (topshift - 1);
4870
4871           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4872
4873           if (const_ok_for_arm (temp2))
4874             {
4875               if (generate)
4876                 {
4877                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4878                   emit_constant_insn (cond,
4879                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4880                   emit_constant_insn (cond,
4881                                       gen_addsi3 (target, new_src,
4882                                                   GEN_INT (-temp2)));
4883                 }
4884
4885               return 2;
4886             }
4887         }
4888
4889       /* See if we can generate this by setting the bottom (or the top)
4890          16 bits, and then shifting these into the other half of the
4891          word.  We only look for the simplest cases, to do more would cost
4892          too much.  Be careful, however, not to generate this when the
4893          alternative would take fewer insns.  */
4894       if (val & 0xffff0000)
4895         {
4896           temp1 = remainder & 0xffff0000;
4897           temp2 = remainder & 0x0000ffff;
4898
4899           /* Overlaps outside this range are best done using other methods.  */
4900           for (i = 9; i < 24; i++)
4901             {
4902               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4903                   && !const_ok_for_arm (temp2))
4904                 {
4905                   rtx new_src = (subtargets
4906                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4907                                  : target);
4908                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4909                                             source, subtargets, generate);
4910                   source = new_src;
4911                   if (generate)
4912                     emit_constant_insn
4913                       (cond,
4914                        gen_rtx_SET
4915                        (target,
4916                         gen_rtx_IOR (mode,
4917                                      gen_rtx_ASHIFT (mode, source,
4918                                                      GEN_INT (i)),
4919                                      source)));
4920                   return insns + 1;
4921                 }
4922             }
4923
4924           /* Don't duplicate cases already considered.  */
4925           for (i = 17; i < 24; i++)
4926             {
4927               if (((temp1 | (temp1 >> i)) == remainder)
4928                   && !const_ok_for_arm (temp1))
4929                 {
4930                   rtx new_src = (subtargets
4931                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4932                                  : target);
4933                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4934                                             source, subtargets, generate);
4935                   source = new_src;
4936                   if (generate)
4937                     emit_constant_insn
4938                       (cond,
4939                        gen_rtx_SET (target,
4940                                     gen_rtx_IOR
4941                                     (mode,
4942                                      gen_rtx_LSHIFTRT (mode, source,
4943                                                        GEN_INT (i)),
4944                                      source)));
4945                   return insns + 1;
4946                 }
4947             }
4948         }
4949       break;
4950
4951     case IOR:
4952     case XOR:
4953       /* If we have IOR or XOR, and the constant can be loaded in a
4954          single instruction, and we can find a temporary to put it in,
4955          then this can be done in two instructions instead of 3-4.  */
4956       if (subtargets
4957           /* TARGET can't be NULL if SUBTARGETS is 0 */
4958           || (reload_completed && !reg_mentioned_p (target, source)))
4959         {
4960           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4961             {
4962               if (generate)
4963                 {
4964                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4965
4966                   emit_constant_insn (cond,
4967                                       gen_rtx_SET (sub, GEN_INT (val)));
4968                   emit_constant_insn (cond,
4969                                       gen_rtx_SET (target,
4970                                                    gen_rtx_fmt_ee (code, mode,
4971                                                                    source, sub)));
4972                 }
4973               return 2;
4974             }
4975         }
4976
4977       if (code == XOR)
4978         break;
4979
4980       /*  Convert.
4981           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4982                              and the remainder 0s for e.g. 0xfff00000)
4983           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4984
4985           This can be done in 2 instructions by using shifts with mov or mvn.
4986           e.g. for
4987           x = x | 0xfff00000;
4988           we generate.
4989           mvn   r0, r0, asl #12
4990           mvn   r0, r0, lsr #12  */
4991       if (set_sign_bit_copies > 8
4992           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4993         {
4994           if (generate)
4995             {
4996               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4997               rtx shift = GEN_INT (set_sign_bit_copies);
4998
4999               emit_constant_insn
5000                 (cond,
5001                  gen_rtx_SET (sub,
5002                               gen_rtx_NOT (mode,
5003                                            gen_rtx_ASHIFT (mode,
5004                                                            source,
5005                                                            shift))));
5006               emit_constant_insn
5007                 (cond,
5008                  gen_rtx_SET (target,
5009                               gen_rtx_NOT (mode,
5010                                            gen_rtx_LSHIFTRT (mode, sub,
5011                                                              shift))));
5012             }
5013           return 2;
5014         }
5015
5016       /* Convert
5017           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5018            to
5019           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5020
5021           For eg. r0 = r0 | 0xfff
5022                mvn      r0, r0, lsr #12
5023                mvn      r0, r0, asl #12
5024
5025       */
5026       if (set_zero_bit_copies > 8
5027           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5028         {
5029           if (generate)
5030             {
5031               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5032               rtx shift = GEN_INT (set_zero_bit_copies);
5033
5034               emit_constant_insn
5035                 (cond,
5036                  gen_rtx_SET (sub,
5037                               gen_rtx_NOT (mode,
5038                                            gen_rtx_LSHIFTRT (mode,
5039                                                              source,
5040                                                              shift))));
5041               emit_constant_insn
5042                 (cond,
5043                  gen_rtx_SET (target,
5044                               gen_rtx_NOT (mode,
5045                                            gen_rtx_ASHIFT (mode, sub,
5046                                                            shift))));
5047             }
5048           return 2;
5049         }
5050
5051       /* This will never be reached for Thumb2 because orn is a valid
5052          instruction. This is for Thumb1 and the ARM 32 bit cases.
5053
5054          x = y | constant (such that ~constant is a valid constant)
5055          Transform this to
5056          x = ~(~y & ~constant).
5057       */
5058       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5059         {
5060           if (generate)
5061             {
5062               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5063               emit_constant_insn (cond,
5064                                   gen_rtx_SET (sub,
5065                                                gen_rtx_NOT (mode, source)));
5066               source = sub;
5067               if (subtargets)
5068                 sub = gen_reg_rtx (mode);
5069               emit_constant_insn (cond,
5070                                   gen_rtx_SET (sub,
5071                                                gen_rtx_AND (mode, source,
5072                                                             GEN_INT (temp1))));
5073               emit_constant_insn (cond,
5074                                   gen_rtx_SET (target,
5075                                                gen_rtx_NOT (mode, sub)));
5076             }
5077           return 3;
5078         }
5079       break;
5080
5081     case AND:
5082       /* See if two shifts will do 2 or more insn's worth of work.  */
5083       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5084         {
5085           HOST_WIDE_INT shift_mask = ((0xffffffff
5086                                        << (32 - clear_sign_bit_copies))
5087                                       & 0xffffffff);
5088
5089           if ((remainder | shift_mask) != 0xffffffff)
5090             {
5091               HOST_WIDE_INT new_val
5092                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5093
5094               if (generate)
5095                 {
5096                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5097                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5098                                             new_src, source, subtargets, 1);
5099                   source = new_src;
5100                 }
5101               else
5102                 {
5103                   rtx targ = subtargets ? NULL_RTX : target;
5104                   insns = arm_gen_constant (AND, mode, cond, new_val,
5105                                             targ, source, subtargets, 0);
5106                 }
5107             }
5108
5109           if (generate)
5110             {
5111               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5112               rtx shift = GEN_INT (clear_sign_bit_copies);
5113
5114               emit_insn (gen_ashlsi3 (new_src, source, shift));
5115               emit_insn (gen_lshrsi3 (target, new_src, shift));
5116             }
5117
5118           return insns + 2;
5119         }
5120
5121       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5122         {
5123           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5124
5125           if ((remainder | shift_mask) != 0xffffffff)
5126             {
5127               HOST_WIDE_INT new_val
5128                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5129               if (generate)
5130                 {
5131                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5132
5133                   insns = arm_gen_constant (AND, mode, cond, new_val,
5134                                             new_src, source, subtargets, 1);
5135                   source = new_src;
5136                 }
5137               else
5138                 {
5139                   rtx targ = subtargets ? NULL_RTX : target;
5140
5141                   insns = arm_gen_constant (AND, mode, cond, new_val,
5142                                             targ, source, subtargets, 0);
5143                 }
5144             }
5145
5146           if (generate)
5147             {
5148               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5149               rtx shift = GEN_INT (clear_zero_bit_copies);
5150
5151               emit_insn (gen_lshrsi3 (new_src, source, shift));
5152               emit_insn (gen_ashlsi3 (target, new_src, shift));
5153             }
5154
5155           return insns + 2;
5156         }
5157
5158       break;
5159
5160     default:
5161       break;
5162     }
5163
5164   /* Calculate what the instruction sequences would be if we generated it
5165      normally, negated, or inverted.  */
5166   if (code == AND)
5167     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5168     insns = 99;
5169   else
5170     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5171
5172   if (can_negate)
5173     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5174                                             &neg_immediates);
5175   else
5176     neg_insns = 99;
5177
5178   if (can_invert || final_invert)
5179     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5180                                             &inv_immediates);
5181   else
5182     inv_insns = 99;
5183
5184   immediates = &pos_immediates;
5185
5186   /* Is the negated immediate sequence more efficient?  */
5187   if (neg_insns < insns && neg_insns <= inv_insns)
5188     {
5189       insns = neg_insns;
5190       immediates = &neg_immediates;
5191     }
5192   else
5193     can_negate = 0;
5194
5195   /* Is the inverted immediate sequence more efficient?
5196      We must allow for an extra NOT instruction for XOR operations, although
5197      there is some chance that the final 'mvn' will get optimized later.  */
5198   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5199     {
5200       insns = inv_insns;
5201       immediates = &inv_immediates;
5202     }
5203   else
5204     {
5205       can_invert = 0;
5206       final_invert = 0;
5207     }
5208
5209   /* Now output the chosen sequence as instructions.  */
5210   if (generate)
5211     {
5212       for (i = 0; i < insns; i++)
5213         {
5214           rtx new_src, temp1_rtx;
5215
5216           temp1 = immediates->i[i];
5217
5218           if (code == SET || code == MINUS)
5219             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5220           else if ((final_invert || i < (insns - 1)) && subtargets)
5221             new_src = gen_reg_rtx (mode);
5222           else
5223             new_src = target;
5224
5225           if (can_invert)
5226             temp1 = ~temp1;
5227           else if (can_negate)
5228             temp1 = -temp1;
5229
5230           temp1 = trunc_int_for_mode (temp1, mode);
5231           temp1_rtx = GEN_INT (temp1);
5232
5233           if (code == SET)
5234             ;
5235           else if (code == MINUS)
5236             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5237           else
5238             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5239
5240           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5241           source = new_src;
5242
5243           if (code == SET)
5244             {
5245               can_negate = can_invert;
5246               can_invert = 0;
5247               code = PLUS;
5248             }
5249           else if (code == MINUS)
5250             code = PLUS;
5251         }
5252     }
5253
5254   if (final_invert)
5255     {
5256       if (generate)
5257         emit_constant_insn (cond, gen_rtx_SET (target,
5258                                                gen_rtx_NOT (mode, source)));
5259       insns++;
5260     }
5261
5262   return insns;
5263 }
5264
5265 /* Canonicalize a comparison so that we are more likely to recognize it.
5266    This can be done for a few constant compares, where we can make the
5267    immediate value easier to load.  */
5268
5269 static void
5270 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5271                              bool op0_preserve_value)
5272 {
5273   machine_mode mode;
5274   unsigned HOST_WIDE_INT i, maxval;
5275
5276   mode = GET_MODE (*op0);
5277   if (mode == VOIDmode)
5278     mode = GET_MODE (*op1);
5279
5280   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5281
5282   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5283      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5284      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5285      for GTU/LEU in Thumb mode.  */
5286   if (mode == DImode)
5287     {
5288
5289       if (*code == GT || *code == LE
5290           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5291         {
5292           /* Missing comparison.  First try to use an available
5293              comparison.  */
5294           if (CONST_INT_P (*op1))
5295             {
5296               i = INTVAL (*op1);
5297               switch (*code)
5298                 {
5299                 case GT:
5300                 case LE:
5301                   if (i != maxval
5302                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5303                     {
5304                       *op1 = GEN_INT (i + 1);
5305                       *code = *code == GT ? GE : LT;
5306                       return;
5307                     }
5308                   break;
5309                 case GTU:
5310                 case LEU:
5311                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5312                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5313                     {
5314                       *op1 = GEN_INT (i + 1);
5315                       *code = *code == GTU ? GEU : LTU;
5316                       return;
5317                     }
5318                   break;
5319                 default:
5320                   gcc_unreachable ();
5321                 }
5322             }
5323
5324           /* If that did not work, reverse the condition.  */
5325           if (!op0_preserve_value)
5326             {
5327               std::swap (*op0, *op1);
5328               *code = (int)swap_condition ((enum rtx_code)*code);
5329             }
5330         }
5331       return;
5332     }
5333
5334   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5335      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5336      to facilitate possible combining with a cmp into 'ands'.  */
5337   if (mode == SImode
5338       && GET_CODE (*op0) == ZERO_EXTEND
5339       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5340       && GET_MODE (XEXP (*op0, 0)) == QImode
5341       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5342       && subreg_lowpart_p (XEXP (*op0, 0))
5343       && *op1 == const0_rtx)
5344     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5345                         GEN_INT (255));
5346
5347   /* Comparisons smaller than DImode.  Only adjust comparisons against
5348      an out-of-range constant.  */
5349   if (!CONST_INT_P (*op1)
5350       || const_ok_for_arm (INTVAL (*op1))
5351       || const_ok_for_arm (- INTVAL (*op1)))
5352     return;
5353
5354   i = INTVAL (*op1);
5355
5356   switch (*code)
5357     {
5358     case EQ:
5359     case NE:
5360       return;
5361
5362     case GT:
5363     case LE:
5364       if (i != maxval
5365           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5366         {
5367           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5368           *code = *code == GT ? GE : LT;
5369           return;
5370         }
5371       break;
5372
5373     case GE:
5374     case LT:
5375       if (i != ~maxval
5376           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5377         {
5378           *op1 = GEN_INT (i - 1);
5379           *code = *code == GE ? GT : LE;
5380           return;
5381         }
5382       break;
5383
5384     case GTU:
5385     case LEU:
5386       if (i != ~((unsigned HOST_WIDE_INT) 0)
5387           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5388         {
5389           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5390           *code = *code == GTU ? GEU : LTU;
5391           return;
5392         }
5393       break;
5394
5395     case GEU:
5396     case LTU:
5397       if (i != 0
5398           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5399         {
5400           *op1 = GEN_INT (i - 1);
5401           *code = *code == GEU ? GTU : LEU;
5402           return;
5403         }
5404       break;
5405
5406     default:
5407       gcc_unreachable ();
5408     }
5409 }
5410
5411
5412 /* Define how to find the value returned by a function.  */
5413
5414 static rtx
5415 arm_function_value(const_tree type, const_tree func,
5416                    bool outgoing ATTRIBUTE_UNUSED)
5417 {
5418   machine_mode mode;
5419   int unsignedp ATTRIBUTE_UNUSED;
5420   rtx r ATTRIBUTE_UNUSED;
5421
5422   mode = TYPE_MODE (type);
5423
5424   if (TARGET_AAPCS_BASED)
5425     return aapcs_allocate_return_reg (mode, type, func);
5426
5427   /* Promote integer types.  */
5428   if (INTEGRAL_TYPE_P (type))
5429     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5430
5431   /* Promotes small structs returned in a register to full-word size
5432      for big-endian AAPCS.  */
5433   if (arm_return_in_msb (type))
5434     {
5435       HOST_WIDE_INT size = int_size_in_bytes (type);
5436       if (size % UNITS_PER_WORD != 0)
5437         {
5438           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5439           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5440         }
5441     }
5442
5443   return arm_libcall_value_1 (mode);
5444 }
5445
5446 /* libcall hashtable helpers.  */
5447
5448 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5449 {
5450   static inline hashval_t hash (const rtx_def *);
5451   static inline bool equal (const rtx_def *, const rtx_def *);
5452   static inline void remove (rtx_def *);
5453 };
5454
5455 inline bool
5456 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5457 {
5458   return rtx_equal_p (p1, p2);
5459 }
5460
5461 inline hashval_t
5462 libcall_hasher::hash (const rtx_def *p1)
5463 {
5464   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5465 }
5466
5467 typedef hash_table<libcall_hasher> libcall_table_type;
5468
5469 static void
5470 add_libcall (libcall_table_type *htab, rtx libcall)
5471 {
5472   *htab->find_slot (libcall, INSERT) = libcall;
5473 }
5474
5475 static bool
5476 arm_libcall_uses_aapcs_base (const_rtx libcall)
5477 {
5478   static bool init_done = false;
5479   static libcall_table_type *libcall_htab = NULL;
5480
5481   if (!init_done)
5482     {
5483       init_done = true;
5484
5485       libcall_htab = new libcall_table_type (31);
5486       add_libcall (libcall_htab,
5487                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5488       add_libcall (libcall_htab,
5489                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5490       add_libcall (libcall_htab,
5491                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5492       add_libcall (libcall_htab,
5493                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5494
5495       add_libcall (libcall_htab,
5496                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5497       add_libcall (libcall_htab,
5498                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5499       add_libcall (libcall_htab,
5500                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5501       add_libcall (libcall_htab,
5502                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5503
5504       add_libcall (libcall_htab,
5505                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5506       add_libcall (libcall_htab,
5507                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5508       add_libcall (libcall_htab,
5509                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5510       add_libcall (libcall_htab,
5511                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5512       add_libcall (libcall_htab,
5513                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5514       add_libcall (libcall_htab,
5515                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5516       add_libcall (libcall_htab,
5517                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5518       add_libcall (libcall_htab,
5519                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5520
5521       /* Values from double-precision helper functions are returned in core
5522          registers if the selected core only supports single-precision
5523          arithmetic, even if we are using the hard-float ABI.  The same is
5524          true for single-precision helpers, but we will never be using the
5525          hard-float ABI on a CPU which doesn't support single-precision
5526          operations in hardware.  */
5527       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5528       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5529       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5530       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5531       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5532       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5533       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5534       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5535       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5536       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5537       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5538       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5539                                                         SFmode));
5540       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5541                                                         DFmode));
5542       add_libcall (libcall_htab,
5543                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5544     }
5545
5546   return libcall && libcall_htab->find (libcall) != NULL;
5547 }
5548
5549 static rtx
5550 arm_libcall_value_1 (machine_mode mode)
5551 {
5552   if (TARGET_AAPCS_BASED)
5553     return aapcs_libcall_value (mode);
5554   else if (TARGET_IWMMXT_ABI
5555            && arm_vector_mode_supported_p (mode))
5556     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5557   else
5558     return gen_rtx_REG (mode, ARG_REGISTER (1));
5559 }
5560
5561 /* Define how to find the value returned by a library function
5562    assuming the value has mode MODE.  */
5563
5564 static rtx
5565 arm_libcall_value (machine_mode mode, const_rtx libcall)
5566 {
5567   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5568       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5569     {
5570       /* The following libcalls return their result in integer registers,
5571          even though they return a floating point value.  */
5572       if (arm_libcall_uses_aapcs_base (libcall))
5573         return gen_rtx_REG (mode, ARG_REGISTER(1));
5574
5575     }
5576
5577   return arm_libcall_value_1 (mode);
5578 }
5579
5580 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5581
5582 static bool
5583 arm_function_value_regno_p (const unsigned int regno)
5584 {
5585   if (regno == ARG_REGISTER (1)
5586       || (TARGET_32BIT
5587           && TARGET_AAPCS_BASED
5588           && TARGET_HARD_FLOAT
5589           && regno == FIRST_VFP_REGNUM)
5590       || (TARGET_IWMMXT_ABI
5591           && regno == FIRST_IWMMXT_REGNUM))
5592     return true;
5593
5594   return false;
5595 }
5596
5597 /* Determine the amount of memory needed to store the possible return
5598    registers of an untyped call.  */
5599 int
5600 arm_apply_result_size (void)
5601 {
5602   int size = 16;
5603
5604   if (TARGET_32BIT)
5605     {
5606       if (TARGET_HARD_FLOAT_ABI)
5607         size += 32;
5608       if (TARGET_IWMMXT_ABI)
5609         size += 8;
5610     }
5611
5612   return size;
5613 }
5614
5615 /* Decide whether TYPE should be returned in memory (true)
5616    or in a register (false).  FNTYPE is the type of the function making
5617    the call.  */
5618 static bool
5619 arm_return_in_memory (const_tree type, const_tree fntype)
5620 {
5621   HOST_WIDE_INT size;
5622
5623   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5624
5625   if (TARGET_AAPCS_BASED)
5626     {
5627       /* Simple, non-aggregate types (ie not including vectors and
5628          complex) are always returned in a register (or registers).
5629          We don't care about which register here, so we can short-cut
5630          some of the detail.  */
5631       if (!AGGREGATE_TYPE_P (type)
5632           && TREE_CODE (type) != VECTOR_TYPE
5633           && TREE_CODE (type) != COMPLEX_TYPE)
5634         return false;
5635
5636       /* Any return value that is no larger than one word can be
5637          returned in r0.  */
5638       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5639         return false;
5640
5641       /* Check any available co-processors to see if they accept the
5642          type as a register candidate (VFP, for example, can return
5643          some aggregates in consecutive registers).  These aren't
5644          available if the call is variadic.  */
5645       if (aapcs_select_return_coproc (type, fntype) >= 0)
5646         return false;
5647
5648       /* Vector values should be returned using ARM registers, not
5649          memory (unless they're over 16 bytes, which will break since
5650          we only have four call-clobbered registers to play with).  */
5651       if (TREE_CODE (type) == VECTOR_TYPE)
5652         return (size < 0 || size > (4 * UNITS_PER_WORD));
5653
5654       /* The rest go in memory.  */
5655       return true;
5656     }
5657
5658   if (TREE_CODE (type) == VECTOR_TYPE)
5659     return (size < 0 || size > (4 * UNITS_PER_WORD));
5660
5661   if (!AGGREGATE_TYPE_P (type) &&
5662       (TREE_CODE (type) != VECTOR_TYPE))
5663     /* All simple types are returned in registers.  */
5664     return false;
5665
5666   if (arm_abi != ARM_ABI_APCS)
5667     {
5668       /* ATPCS and later return aggregate types in memory only if they are
5669          larger than a word (or are variable size).  */
5670       return (size < 0 || size > UNITS_PER_WORD);
5671     }
5672
5673   /* For the arm-wince targets we choose to be compatible with Microsoft's
5674      ARM and Thumb compilers, which always return aggregates in memory.  */
5675 #ifndef ARM_WINCE
5676   /* All structures/unions bigger than one word are returned in memory.
5677      Also catch the case where int_size_in_bytes returns -1.  In this case
5678      the aggregate is either huge or of variable size, and in either case
5679      we will want to return it via memory and not in a register.  */
5680   if (size < 0 || size > UNITS_PER_WORD)
5681     return true;
5682
5683   if (TREE_CODE (type) == RECORD_TYPE)
5684     {
5685       tree field;
5686
5687       /* For a struct the APCS says that we only return in a register
5688          if the type is 'integer like' and every addressable element
5689          has an offset of zero.  For practical purposes this means
5690          that the structure can have at most one non bit-field element
5691          and that this element must be the first one in the structure.  */
5692
5693       /* Find the first field, ignoring non FIELD_DECL things which will
5694          have been created by C++.  */
5695       for (field = TYPE_FIELDS (type);
5696            field && TREE_CODE (field) != FIELD_DECL;
5697            field = DECL_CHAIN (field))
5698         continue;
5699
5700       if (field == NULL)
5701         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5702
5703       /* Check that the first field is valid for returning in a register.  */
5704
5705       /* ... Floats are not allowed */
5706       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5707         return true;
5708
5709       /* ... Aggregates that are not themselves valid for returning in
5710          a register are not allowed.  */
5711       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5712         return true;
5713
5714       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5715          since they are not addressable.  */
5716       for (field = DECL_CHAIN (field);
5717            field;
5718            field = DECL_CHAIN (field))
5719         {
5720           if (TREE_CODE (field) != FIELD_DECL)
5721             continue;
5722
5723           if (!DECL_BIT_FIELD_TYPE (field))
5724             return true;
5725         }
5726
5727       return false;
5728     }
5729
5730   if (TREE_CODE (type) == UNION_TYPE)
5731     {
5732       tree field;
5733
5734       /* Unions can be returned in registers if every element is
5735          integral, or can be returned in an integer register.  */
5736       for (field = TYPE_FIELDS (type);
5737            field;
5738            field = DECL_CHAIN (field))
5739         {
5740           if (TREE_CODE (field) != FIELD_DECL)
5741             continue;
5742
5743           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5744             return true;
5745
5746           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5747             return true;
5748         }
5749
5750       return false;
5751     }
5752 #endif /* not ARM_WINCE */
5753
5754   /* Return all other types in memory.  */
5755   return true;
5756 }
5757
5758 const struct pcs_attribute_arg
5759 {
5760   const char *arg;
5761   enum arm_pcs value;
5762 } pcs_attribute_args[] =
5763   {
5764     {"aapcs", ARM_PCS_AAPCS},
5765     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5766 #if 0
5767     /* We could recognize these, but changes would be needed elsewhere
5768      * to implement them.  */
5769     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5770     {"atpcs", ARM_PCS_ATPCS},
5771     {"apcs", ARM_PCS_APCS},
5772 #endif
5773     {NULL, ARM_PCS_UNKNOWN}
5774   };
5775
5776 static enum arm_pcs
5777 arm_pcs_from_attribute (tree attr)
5778 {
5779   const struct pcs_attribute_arg *ptr;
5780   const char *arg;
5781
5782   /* Get the value of the argument.  */
5783   if (TREE_VALUE (attr) == NULL_TREE
5784       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5785     return ARM_PCS_UNKNOWN;
5786
5787   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5788
5789   /* Check it against the list of known arguments.  */
5790   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5791     if (streq (arg, ptr->arg))
5792       return ptr->value;
5793
5794   /* An unrecognized interrupt type.  */
5795   return ARM_PCS_UNKNOWN;
5796 }
5797
5798 /* Get the PCS variant to use for this call.  TYPE is the function's type
5799    specification, DECL is the specific declartion.  DECL may be null if
5800    the call could be indirect or if this is a library call.  */
5801 static enum arm_pcs
5802 arm_get_pcs_model (const_tree type, const_tree decl)
5803 {
5804   bool user_convention = false;
5805   enum arm_pcs user_pcs = arm_pcs_default;
5806   tree attr;
5807
5808   gcc_assert (type);
5809
5810   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5811   if (attr)
5812     {
5813       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5814       user_convention = true;
5815     }
5816
5817   if (TARGET_AAPCS_BASED)
5818     {
5819       /* Detect varargs functions.  These always use the base rules
5820          (no argument is ever a candidate for a co-processor
5821          register).  */
5822       bool base_rules = stdarg_p (type);
5823
5824       if (user_convention)
5825         {
5826           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5827             sorry ("non-AAPCS derived PCS variant");
5828           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5829             error ("variadic functions must use the base AAPCS variant");
5830         }
5831
5832       if (base_rules)
5833         return ARM_PCS_AAPCS;
5834       else if (user_convention)
5835         return user_pcs;
5836       else if (decl && flag_unit_at_a_time)
5837         {
5838           /* Local functions never leak outside this compilation unit,
5839              so we are free to use whatever conventions are
5840              appropriate.  */
5841           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5842           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5843           if (i && i->local)
5844             return ARM_PCS_AAPCS_LOCAL;
5845         }
5846     }
5847   else if (user_convention && user_pcs != arm_pcs_default)
5848     sorry ("PCS variant");
5849
5850   /* For everything else we use the target's default.  */
5851   return arm_pcs_default;
5852 }
5853
5854
5855 static void
5856 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5857                     const_tree fntype ATTRIBUTE_UNUSED,
5858                     rtx libcall ATTRIBUTE_UNUSED,
5859                     const_tree fndecl ATTRIBUTE_UNUSED)
5860 {
5861   /* Record the unallocated VFP registers.  */
5862   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5863   pcum->aapcs_vfp_reg_alloc = 0;
5864 }
5865
5866 /* Walk down the type tree of TYPE counting consecutive base elements.
5867    If *MODEP is VOIDmode, then set it to the first valid floating point
5868    type.  If a non-floating point type is found, or if a floating point
5869    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5870    otherwise return the count in the sub-tree.  */
5871 static int
5872 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5873 {
5874   machine_mode mode;
5875   HOST_WIDE_INT size;
5876
5877   switch (TREE_CODE (type))
5878     {
5879     case REAL_TYPE:
5880       mode = TYPE_MODE (type);
5881       if (mode != DFmode && mode != SFmode && mode != HFmode)
5882         return -1;
5883
5884       if (*modep == VOIDmode)
5885         *modep = mode;
5886
5887       if (*modep == mode)
5888         return 1;
5889
5890       break;
5891
5892     case COMPLEX_TYPE:
5893       mode = TYPE_MODE (TREE_TYPE (type));
5894       if (mode != DFmode && mode != SFmode)
5895         return -1;
5896
5897       if (*modep == VOIDmode)
5898         *modep = mode;
5899
5900       if (*modep == mode)
5901         return 2;
5902
5903       break;
5904
5905     case VECTOR_TYPE:
5906       /* Use V2SImode and V4SImode as representatives of all 64-bit
5907          and 128-bit vector types, whether or not those modes are
5908          supported with the present options.  */
5909       size = int_size_in_bytes (type);
5910       switch (size)
5911         {
5912         case 8:
5913           mode = V2SImode;
5914           break;
5915         case 16:
5916           mode = V4SImode;
5917           break;
5918         default:
5919           return -1;
5920         }
5921
5922       if (*modep == VOIDmode)
5923         *modep = mode;
5924
5925       /* Vector modes are considered to be opaque: two vectors are
5926          equivalent for the purposes of being homogeneous aggregates
5927          if they are the same size.  */
5928       if (*modep == mode)
5929         return 1;
5930
5931       break;
5932
5933     case ARRAY_TYPE:
5934       {
5935         int count;
5936         tree index = TYPE_DOMAIN (type);
5937
5938         /* Can't handle incomplete types nor sizes that are not
5939            fixed.  */
5940         if (!COMPLETE_TYPE_P (type)
5941             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5942           return -1;
5943
5944         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5945         if (count == -1
5946             || !index
5947             || !TYPE_MAX_VALUE (index)
5948             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5949             || !TYPE_MIN_VALUE (index)
5950             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5951             || count < 0)
5952           return -1;
5953
5954         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5955                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5956
5957         /* There must be no padding.  */
5958         if (wi::to_wide (TYPE_SIZE (type))
5959             != count * GET_MODE_BITSIZE (*modep))
5960           return -1;
5961
5962         return count;
5963       }
5964
5965     case RECORD_TYPE:
5966       {
5967         int count = 0;
5968         int sub_count;
5969         tree field;
5970
5971         /* Can't handle incomplete types nor sizes that are not
5972            fixed.  */
5973         if (!COMPLETE_TYPE_P (type)
5974             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5975           return -1;
5976
5977         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5978           {
5979             if (TREE_CODE (field) != FIELD_DECL)
5980               continue;
5981
5982             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5983             if (sub_count < 0)
5984               return -1;
5985             count += sub_count;
5986           }
5987
5988         /* There must be no padding.  */
5989         if (wi::to_wide (TYPE_SIZE (type))
5990             != count * GET_MODE_BITSIZE (*modep))
5991           return -1;
5992
5993         return count;
5994       }
5995
5996     case UNION_TYPE:
5997     case QUAL_UNION_TYPE:
5998       {
5999         /* These aren't very interesting except in a degenerate case.  */
6000         int count = 0;
6001         int sub_count;
6002         tree field;
6003
6004         /* Can't handle incomplete types nor sizes that are not
6005            fixed.  */
6006         if (!COMPLETE_TYPE_P (type)
6007             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6008           return -1;
6009
6010         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6011           {
6012             if (TREE_CODE (field) != FIELD_DECL)
6013               continue;
6014
6015             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6016             if (sub_count < 0)
6017               return -1;
6018             count = count > sub_count ? count : sub_count;
6019           }
6020
6021         /* There must be no padding.  */
6022         if (wi::to_wide (TYPE_SIZE (type))
6023             != count * GET_MODE_BITSIZE (*modep))
6024           return -1;
6025
6026         return count;
6027       }
6028
6029     default:
6030       break;
6031     }
6032
6033   return -1;
6034 }
6035
6036 /* Return true if PCS_VARIANT should use VFP registers.  */
6037 static bool
6038 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6039 {
6040   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6041     {
6042       static bool seen_thumb1_vfp = false;
6043
6044       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6045         {
6046           sorry ("Thumb-1 hard-float VFP ABI");
6047           /* sorry() is not immediately fatal, so only display this once.  */
6048           seen_thumb1_vfp = true;
6049         }
6050
6051       return true;
6052     }
6053
6054   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6055     return false;
6056
6057   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6058           (TARGET_VFP_DOUBLE || !is_double));
6059 }
6060
6061 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6062    suitable for passing or returning in VFP registers for the PCS
6063    variant selected.  If it is, then *BASE_MODE is updated to contain
6064    a machine mode describing each element of the argument's type and
6065    *COUNT to hold the number of such elements.  */
6066 static bool
6067 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6068                                        machine_mode mode, const_tree type,
6069                                        machine_mode *base_mode, int *count)
6070 {
6071   machine_mode new_mode = VOIDmode;
6072
6073   /* If we have the type information, prefer that to working things
6074      out from the mode.  */
6075   if (type)
6076     {
6077       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6078
6079       if (ag_count > 0 && ag_count <= 4)
6080         *count = ag_count;
6081       else
6082         return false;
6083     }
6084   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6085            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6086            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6087     {
6088       *count = 1;
6089       new_mode = mode;
6090     }
6091   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6092     {
6093       *count = 2;
6094       new_mode = (mode == DCmode ? DFmode : SFmode);
6095     }
6096   else
6097     return false;
6098
6099
6100   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6101     return false;
6102
6103   *base_mode = new_mode;
6104   return true;
6105 }
6106
6107 static bool
6108 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6109                                machine_mode mode, const_tree type)
6110 {
6111   int count ATTRIBUTE_UNUSED;
6112   machine_mode ag_mode ATTRIBUTE_UNUSED;
6113
6114   if (!use_vfp_abi (pcs_variant, false))
6115     return false;
6116   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6117                                                 &ag_mode, &count);
6118 }
6119
6120 static bool
6121 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6122                              const_tree type)
6123 {
6124   if (!use_vfp_abi (pcum->pcs_variant, false))
6125     return false;
6126
6127   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6128                                                 &pcum->aapcs_vfp_rmode,
6129                                                 &pcum->aapcs_vfp_rcount);
6130 }
6131
6132 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6133    for the behaviour of this function.  */
6134
6135 static bool
6136 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6137                     const_tree type  ATTRIBUTE_UNUSED)
6138 {
6139   int rmode_size
6140     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6141   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6142   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6143   int regno;
6144
6145   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6146     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6147       {
6148         pcum->aapcs_vfp_reg_alloc = mask << regno;
6149         if (mode == BLKmode
6150             || (mode == TImode && ! TARGET_NEON)
6151             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6152           {
6153             int i;
6154             int rcount = pcum->aapcs_vfp_rcount;
6155             int rshift = shift;
6156             machine_mode rmode = pcum->aapcs_vfp_rmode;
6157             rtx par;
6158             if (!TARGET_NEON)
6159               {
6160                 /* Avoid using unsupported vector modes.  */
6161                 if (rmode == V2SImode)
6162                   rmode = DImode;
6163                 else if (rmode == V4SImode)
6164                   {
6165                     rmode = DImode;
6166                     rcount *= 2;
6167                     rshift /= 2;
6168                   }
6169               }
6170             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6171             for (i = 0; i < rcount; i++)
6172               {
6173                 rtx tmp = gen_rtx_REG (rmode,
6174                                        FIRST_VFP_REGNUM + regno + i * rshift);
6175                 tmp = gen_rtx_EXPR_LIST
6176                   (VOIDmode, tmp,
6177                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6178                 XVECEXP (par, 0, i) = tmp;
6179               }
6180
6181             pcum->aapcs_reg = par;
6182           }
6183         else
6184           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6185         return true;
6186       }
6187   return false;
6188 }
6189
6190 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6191    comment there for the behaviour of this function.  */
6192
6193 static rtx
6194 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6195                                machine_mode mode,
6196                                const_tree type ATTRIBUTE_UNUSED)
6197 {
6198   if (!use_vfp_abi (pcs_variant, false))
6199     return NULL;
6200
6201   if (mode == BLKmode
6202       || (GET_MODE_CLASS (mode) == MODE_INT
6203           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6204           && !TARGET_NEON))
6205     {
6206       int count;
6207       machine_mode ag_mode;
6208       int i;
6209       rtx par;
6210       int shift;
6211
6212       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6213                                              &ag_mode, &count);
6214
6215       if (!TARGET_NEON)
6216         {
6217           if (ag_mode == V2SImode)
6218             ag_mode = DImode;
6219           else if (ag_mode == V4SImode)
6220             {
6221               ag_mode = DImode;
6222               count *= 2;
6223             }
6224         }
6225       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6226       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6227       for (i = 0; i < count; i++)
6228         {
6229           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6230           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6231                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6232           XVECEXP (par, 0, i) = tmp;
6233         }
6234
6235       return par;
6236     }
6237
6238   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6239 }
6240
6241 static void
6242 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6243                    machine_mode mode  ATTRIBUTE_UNUSED,
6244                    const_tree type  ATTRIBUTE_UNUSED)
6245 {
6246   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6247   pcum->aapcs_vfp_reg_alloc = 0;
6248   return;
6249 }
6250
6251 #define AAPCS_CP(X)                             \
6252   {                                             \
6253     aapcs_ ## X ## _cum_init,                   \
6254     aapcs_ ## X ## _is_call_candidate,          \
6255     aapcs_ ## X ## _allocate,                   \
6256     aapcs_ ## X ## _is_return_candidate,        \
6257     aapcs_ ## X ## _allocate_return_reg,        \
6258     aapcs_ ## X ## _advance                     \
6259   }
6260
6261 /* Table of co-processors that can be used to pass arguments in
6262    registers.  Idealy no arugment should be a candidate for more than
6263    one co-processor table entry, but the table is processed in order
6264    and stops after the first match.  If that entry then fails to put
6265    the argument into a co-processor register, the argument will go on
6266    the stack.  */
6267 static struct
6268 {
6269   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6270   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6271
6272   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6273      BLKmode) is a candidate for this co-processor's registers; this
6274      function should ignore any position-dependent state in
6275      CUMULATIVE_ARGS and only use call-type dependent information.  */
6276   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6277
6278   /* Return true if the argument does get a co-processor register; it
6279      should set aapcs_reg to an RTX of the register allocated as is
6280      required for a return from FUNCTION_ARG.  */
6281   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282
6283   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6284      be returned in this co-processor's registers.  */
6285   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6286
6287   /* Allocate and return an RTX element to hold the return type of a call.  This
6288      routine must not fail and will only be called if is_return_candidate
6289      returned true with the same parameters.  */
6290   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6291
6292   /* Finish processing this argument and prepare to start processing
6293      the next one.  */
6294   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6295 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6296   {
6297     AAPCS_CP(vfp)
6298   };
6299
6300 #undef AAPCS_CP
6301
6302 static int
6303 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6304                           const_tree type)
6305 {
6306   int i;
6307
6308   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6309     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6310       return i;
6311
6312   return -1;
6313 }
6314
6315 static int
6316 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6317 {
6318   /* We aren't passed a decl, so we can't check that a call is local.
6319      However, it isn't clear that that would be a win anyway, since it
6320      might limit some tail-calling opportunities.  */
6321   enum arm_pcs pcs_variant;
6322
6323   if (fntype)
6324     {
6325       const_tree fndecl = NULL_TREE;
6326
6327       if (TREE_CODE (fntype) == FUNCTION_DECL)
6328         {
6329           fndecl = fntype;
6330           fntype = TREE_TYPE (fntype);
6331         }
6332
6333       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6334     }
6335   else
6336     pcs_variant = arm_pcs_default;
6337
6338   if (pcs_variant != ARM_PCS_AAPCS)
6339     {
6340       int i;
6341
6342       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6343         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6344                                                         TYPE_MODE (type),
6345                                                         type))
6346           return i;
6347     }
6348   return -1;
6349 }
6350
6351 static rtx
6352 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6353                            const_tree fntype)
6354 {
6355   /* We aren't passed a decl, so we can't check that a call is local.
6356      However, it isn't clear that that would be a win anyway, since it
6357      might limit some tail-calling opportunities.  */
6358   enum arm_pcs pcs_variant;
6359   int unsignedp ATTRIBUTE_UNUSED;
6360
6361   if (fntype)
6362     {
6363       const_tree fndecl = NULL_TREE;
6364
6365       if (TREE_CODE (fntype) == FUNCTION_DECL)
6366         {
6367           fndecl = fntype;
6368           fntype = TREE_TYPE (fntype);
6369         }
6370
6371       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6372     }
6373   else
6374     pcs_variant = arm_pcs_default;
6375
6376   /* Promote integer types.  */
6377   if (type && INTEGRAL_TYPE_P (type))
6378     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6379
6380   if (pcs_variant != ARM_PCS_AAPCS)
6381     {
6382       int i;
6383
6384       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6385         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6386                                                         type))
6387           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6388                                                              mode, type);
6389     }
6390
6391   /* Promotes small structs returned in a register to full-word size
6392      for big-endian AAPCS.  */
6393   if (type && arm_return_in_msb (type))
6394     {
6395       HOST_WIDE_INT size = int_size_in_bytes (type);
6396       if (size % UNITS_PER_WORD != 0)
6397         {
6398           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6399           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6400         }
6401     }
6402
6403   return gen_rtx_REG (mode, R0_REGNUM);
6404 }
6405
6406 static rtx
6407 aapcs_libcall_value (machine_mode mode)
6408 {
6409   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6410       && GET_MODE_SIZE (mode) <= 4)
6411     mode = SImode;
6412
6413   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6414 }
6415
6416 /* Lay out a function argument using the AAPCS rules.  The rule
6417    numbers referred to here are those in the AAPCS.  */
6418 static void
6419 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6420                   const_tree type, bool named)
6421 {
6422   int nregs, nregs2;
6423   int ncrn;
6424
6425   /* We only need to do this once per argument.  */
6426   if (pcum->aapcs_arg_processed)
6427     return;
6428
6429   pcum->aapcs_arg_processed = true;
6430
6431   /* Special case: if named is false then we are handling an incoming
6432      anonymous argument which is on the stack.  */
6433   if (!named)
6434     return;
6435
6436   /* Is this a potential co-processor register candidate?  */
6437   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6438     {
6439       int slot = aapcs_select_call_coproc (pcum, mode, type);
6440       pcum->aapcs_cprc_slot = slot;
6441
6442       /* We don't have to apply any of the rules from part B of the
6443          preparation phase, these are handled elsewhere in the
6444          compiler.  */
6445
6446       if (slot >= 0)
6447         {
6448           /* A Co-processor register candidate goes either in its own
6449              class of registers or on the stack.  */
6450           if (!pcum->aapcs_cprc_failed[slot])
6451             {
6452               /* C1.cp - Try to allocate the argument to co-processor
6453                  registers.  */
6454               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6455                 return;
6456
6457               /* C2.cp - Put the argument on the stack and note that we
6458                  can't assign any more candidates in this slot.  We also
6459                  need to note that we have allocated stack space, so that
6460                  we won't later try to split a non-cprc candidate between
6461                  core registers and the stack.  */
6462               pcum->aapcs_cprc_failed[slot] = true;
6463               pcum->can_split = false;
6464             }
6465
6466           /* We didn't get a register, so this argument goes on the
6467              stack.  */
6468           gcc_assert (pcum->can_split == false);
6469           return;
6470         }
6471     }
6472
6473   /* C3 - For double-word aligned arguments, round the NCRN up to the
6474      next even number.  */
6475   ncrn = pcum->aapcs_ncrn;
6476   if (ncrn & 1)
6477     {
6478       int res = arm_needs_doubleword_align (mode, type);
6479       /* Only warn during RTL expansion of call stmts, otherwise we would
6480          warn e.g. during gimplification even on functions that will be
6481          always inlined, and we'd warn multiple times.  Don't warn when
6482          called in expand_function_start either, as we warn instead in
6483          arm_function_arg_boundary in that case.  */
6484       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6485         inform (input_location, "parameter passing for argument of type "
6486                 "%qT changed in GCC 7.1", type);
6487       else if (res > 0)
6488         ncrn++;
6489     }
6490
6491   nregs = ARM_NUM_REGS2(mode, type);
6492
6493   /* Sigh, this test should really assert that nregs > 0, but a GCC
6494      extension allows empty structs and then gives them empty size; it
6495      then allows such a structure to be passed by value.  For some of
6496      the code below we have to pretend that such an argument has
6497      non-zero size so that we 'locate' it correctly either in
6498      registers or on the stack.  */
6499   gcc_assert (nregs >= 0);
6500
6501   nregs2 = nregs ? nregs : 1;
6502
6503   /* C4 - Argument fits entirely in core registers.  */
6504   if (ncrn + nregs2 <= NUM_ARG_REGS)
6505     {
6506       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6507       pcum->aapcs_next_ncrn = ncrn + nregs;
6508       return;
6509     }
6510
6511   /* C5 - Some core registers left and there are no arguments already
6512      on the stack: split this argument between the remaining core
6513      registers and the stack.  */
6514   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6515     {
6516       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6517       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6518       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6519       return;
6520     }
6521
6522   /* C6 - NCRN is set to 4.  */
6523   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6524
6525   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6526   return;
6527 }
6528
6529 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6530    for a call to a function whose data type is FNTYPE.
6531    For a library call, FNTYPE is NULL.  */
6532 void
6533 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6534                           rtx libname,
6535                           tree fndecl ATTRIBUTE_UNUSED)
6536 {
6537   /* Long call handling.  */
6538   if (fntype)
6539     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6540   else
6541     pcum->pcs_variant = arm_pcs_default;
6542
6543   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6544     {
6545       if (arm_libcall_uses_aapcs_base (libname))
6546         pcum->pcs_variant = ARM_PCS_AAPCS;
6547
6548       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6549       pcum->aapcs_reg = NULL_RTX;
6550       pcum->aapcs_partial = 0;
6551       pcum->aapcs_arg_processed = false;
6552       pcum->aapcs_cprc_slot = -1;
6553       pcum->can_split = true;
6554
6555       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6556         {
6557           int i;
6558
6559           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6560             {
6561               pcum->aapcs_cprc_failed[i] = false;
6562               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6563             }
6564         }
6565       return;
6566     }
6567
6568   /* Legacy ABIs */
6569
6570   /* On the ARM, the offset starts at 0.  */
6571   pcum->nregs = 0;
6572   pcum->iwmmxt_nregs = 0;
6573   pcum->can_split = true;
6574
6575   /* Varargs vectors are treated the same as long long.
6576      named_count avoids having to change the way arm handles 'named' */
6577   pcum->named_count = 0;
6578   pcum->nargs = 0;
6579
6580   if (TARGET_REALLY_IWMMXT && fntype)
6581     {
6582       tree fn_arg;
6583
6584       for (fn_arg = TYPE_ARG_TYPES (fntype);
6585            fn_arg;
6586            fn_arg = TREE_CHAIN (fn_arg))
6587         pcum->named_count += 1;
6588
6589       if (! pcum->named_count)
6590         pcum->named_count = INT_MAX;
6591     }
6592 }
6593
6594 /* Return 1 if double word alignment is required for argument passing.
6595    Return -1 if double word alignment used to be required for argument
6596    passing before PR77728 ABI fix, but is not required anymore.
6597    Return 0 if double word alignment is not required and wasn't requried
6598    before either.  */
6599 static int
6600 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6601 {
6602   if (!type)
6603     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6604
6605   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6606   if (!AGGREGATE_TYPE_P (type))
6607     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6608
6609   /* Array types: Use member alignment of element type.  */
6610   if (TREE_CODE (type) == ARRAY_TYPE)
6611     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6612
6613   int ret = 0;
6614   /* Record/aggregate types: Use greatest member alignment of any member.  */
6615   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6616     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6617       {
6618         if (TREE_CODE (field) == FIELD_DECL)
6619           return 1;
6620         else
6621           /* Before PR77728 fix, we were incorrectly considering also
6622              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6623              Make sure we can warn about that with -Wpsabi.  */
6624           ret = -1;
6625       }
6626
6627   return ret;
6628 }
6629
6630
6631 /* Determine where to put an argument to a function.
6632    Value is zero to push the argument on the stack,
6633    or a hard register in which to store the argument.
6634
6635    MODE is the argument's machine mode.
6636    TYPE is the data type of the argument (as a tree).
6637     This is null for libcalls where that information may
6638     not be available.
6639    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6640     the preceding args and about the function being called.
6641    NAMED is nonzero if this argument is a named parameter
6642     (otherwise it is an extra parameter matching an ellipsis).
6643
6644    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6645    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6646    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6647    defined), say it is passed in the stack (function_prologue will
6648    indeed make it pass in the stack if necessary).  */
6649
6650 static rtx
6651 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6652                   const_tree type, bool named)
6653 {
6654   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6655   int nregs;
6656
6657   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6658      a call insn (op3 of a call_value insn).  */
6659   if (mode == VOIDmode)
6660     return const0_rtx;
6661
6662   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6663     {
6664       aapcs_layout_arg (pcum, mode, type, named);
6665       return pcum->aapcs_reg;
6666     }
6667
6668   /* Varargs vectors are treated the same as long long.
6669      named_count avoids having to change the way arm handles 'named' */
6670   if (TARGET_IWMMXT_ABI
6671       && arm_vector_mode_supported_p (mode)
6672       && pcum->named_count > pcum->nargs + 1)
6673     {
6674       if (pcum->iwmmxt_nregs <= 9)
6675         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6676       else
6677         {
6678           pcum->can_split = false;
6679           return NULL_RTX;
6680         }
6681     }
6682
6683   /* Put doubleword aligned quantities in even register pairs.  */
6684   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6685     {
6686       int res = arm_needs_doubleword_align (mode, type);
6687       if (res < 0 && warn_psabi)
6688         inform (input_location, "parameter passing for argument of type "
6689                 "%qT changed in GCC 7.1", type);
6690       else if (res > 0)
6691         pcum->nregs++;
6692     }
6693
6694   /* Only allow splitting an arg between regs and memory if all preceding
6695      args were allocated to regs.  For args passed by reference we only count
6696      the reference pointer.  */
6697   if (pcum->can_split)
6698     nregs = 1;
6699   else
6700     nregs = ARM_NUM_REGS2 (mode, type);
6701
6702   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6703     return NULL_RTX;
6704
6705   return gen_rtx_REG (mode, pcum->nregs);
6706 }
6707
6708 static unsigned int
6709 arm_function_arg_boundary (machine_mode mode, const_tree type)
6710 {
6711   if (!ARM_DOUBLEWORD_ALIGN)
6712     return PARM_BOUNDARY;
6713
6714   int res = arm_needs_doubleword_align (mode, type);
6715   if (res < 0 && warn_psabi)
6716     inform (input_location, "parameter passing for argument of type %qT "
6717             "changed in GCC 7.1", type);
6718
6719   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6720 }
6721
6722 static int
6723 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6724                        tree type, bool named)
6725 {
6726   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6727   int nregs = pcum->nregs;
6728
6729   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6730     {
6731       aapcs_layout_arg (pcum, mode, type, named);
6732       return pcum->aapcs_partial;
6733     }
6734
6735   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6736     return 0;
6737
6738   if (NUM_ARG_REGS > nregs
6739       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6740       && pcum->can_split)
6741     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6742
6743   return 0;
6744 }
6745
6746 /* Update the data in PCUM to advance over an argument
6747    of mode MODE and data type TYPE.
6748    (TYPE is null for libcalls where that information may not be available.)  */
6749
6750 static void
6751 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6752                           const_tree type, bool named)
6753 {
6754   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6755
6756   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6757     {
6758       aapcs_layout_arg (pcum, mode, type, named);
6759
6760       if (pcum->aapcs_cprc_slot >= 0)
6761         {
6762           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6763                                                               type);
6764           pcum->aapcs_cprc_slot = -1;
6765         }
6766
6767       /* Generic stuff.  */
6768       pcum->aapcs_arg_processed = false;
6769       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6770       pcum->aapcs_reg = NULL_RTX;
6771       pcum->aapcs_partial = 0;
6772     }
6773   else
6774     {
6775       pcum->nargs += 1;
6776       if (arm_vector_mode_supported_p (mode)
6777           && pcum->named_count > pcum->nargs
6778           && TARGET_IWMMXT_ABI)
6779         pcum->iwmmxt_nregs += 1;
6780       else
6781         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6782     }
6783 }
6784
6785 /* Variable sized types are passed by reference.  This is a GCC
6786    extension to the ARM ABI.  */
6787
6788 static bool
6789 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6790                        machine_mode mode ATTRIBUTE_UNUSED,
6791                        const_tree type, bool named ATTRIBUTE_UNUSED)
6792 {
6793   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6794 }
6795 \f
6796 /* Encode the current state of the #pragma [no_]long_calls.  */
6797 typedef enum
6798 {
6799   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6800   LONG,         /* #pragma long_calls is in effect.  */
6801   SHORT         /* #pragma no_long_calls is in effect.  */
6802 } arm_pragma_enum;
6803
6804 static arm_pragma_enum arm_pragma_long_calls = OFF;
6805
6806 void
6807 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6808 {
6809   arm_pragma_long_calls = LONG;
6810 }
6811
6812 void
6813 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6814 {
6815   arm_pragma_long_calls = SHORT;
6816 }
6817
6818 void
6819 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6820 {
6821   arm_pragma_long_calls = OFF;
6822 }
6823 \f
6824 /* Handle an attribute requiring a FUNCTION_DECL;
6825    arguments as in struct attribute_spec.handler.  */
6826 static tree
6827 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6828                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6829 {
6830   if (TREE_CODE (*node) != FUNCTION_DECL)
6831     {
6832       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6833                name);
6834       *no_add_attrs = true;
6835     }
6836
6837   return NULL_TREE;
6838 }
6839
6840 /* Handle an "interrupt" or "isr" attribute;
6841    arguments as in struct attribute_spec.handler.  */
6842 static tree
6843 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6844                           bool *no_add_attrs)
6845 {
6846   if (DECL_P (*node))
6847     {
6848       if (TREE_CODE (*node) != FUNCTION_DECL)
6849         {
6850           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6851                    name);
6852           *no_add_attrs = true;
6853         }
6854       /* FIXME: the argument if any is checked for type attributes;
6855          should it be checked for decl ones?  */
6856     }
6857   else
6858     {
6859       if (TREE_CODE (*node) == FUNCTION_TYPE
6860           || TREE_CODE (*node) == METHOD_TYPE)
6861         {
6862           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6863             {
6864               warning (OPT_Wattributes, "%qE attribute ignored",
6865                        name);
6866               *no_add_attrs = true;
6867             }
6868         }
6869       else if (TREE_CODE (*node) == POINTER_TYPE
6870                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6871                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6872                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6873         {
6874           *node = build_variant_type_copy (*node);
6875           TREE_TYPE (*node) = build_type_attribute_variant
6876             (TREE_TYPE (*node),
6877              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6878           *no_add_attrs = true;
6879         }
6880       else
6881         {
6882           /* Possibly pass this attribute on from the type to a decl.  */
6883           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6884                        | (int) ATTR_FLAG_FUNCTION_NEXT
6885                        | (int) ATTR_FLAG_ARRAY_NEXT))
6886             {
6887               *no_add_attrs = true;
6888               return tree_cons (name, args, NULL_TREE);
6889             }
6890           else
6891             {
6892               warning (OPT_Wattributes, "%qE attribute ignored",
6893                        name);
6894             }
6895         }
6896     }
6897
6898   return NULL_TREE;
6899 }
6900
6901 /* Handle a "pcs" attribute; arguments as in struct
6902    attribute_spec.handler.  */
6903 static tree
6904 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6905                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6906 {
6907   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6908     {
6909       warning (OPT_Wattributes, "%qE attribute ignored", name);
6910       *no_add_attrs = true;
6911     }
6912   return NULL_TREE;
6913 }
6914
6915 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6916 /* Handle the "notshared" attribute.  This attribute is another way of
6917    requesting hidden visibility.  ARM's compiler supports
6918    "__declspec(notshared)"; we support the same thing via an
6919    attribute.  */
6920
6921 static tree
6922 arm_handle_notshared_attribute (tree *node,
6923                                 tree name ATTRIBUTE_UNUSED,
6924                                 tree args ATTRIBUTE_UNUSED,
6925                                 int flags ATTRIBUTE_UNUSED,
6926                                 bool *no_add_attrs)
6927 {
6928   tree decl = TYPE_NAME (*node);
6929
6930   if (decl)
6931     {
6932       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6933       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6934       *no_add_attrs = false;
6935     }
6936   return NULL_TREE;
6937 }
6938 #endif
6939
6940 /* This function returns true if a function with declaration FNDECL and type
6941    FNTYPE uses the stack to pass arguments or return variables and false
6942    otherwise.  This is used for functions with the attributes
6943    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6944    diagnostic messages if the stack is used.  NAME is the name of the attribute
6945    used.  */
6946
6947 static bool
6948 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6949 {
6950   function_args_iterator args_iter;
6951   CUMULATIVE_ARGS args_so_far_v;
6952   cumulative_args_t args_so_far;
6953   bool first_param = true;
6954   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6955
6956   /* Error out if any argument is passed on the stack.  */
6957   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6958   args_so_far = pack_cumulative_args (&args_so_far_v);
6959   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6960     {
6961       rtx arg_rtx;
6962       machine_mode arg_mode = TYPE_MODE (arg_type);
6963
6964       prev_arg_type = arg_type;
6965       if (VOID_TYPE_P (arg_type))
6966         continue;
6967
6968       if (!first_param)
6969         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6970       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6971       if (!arg_rtx
6972           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6973         {
6974           error ("%qE attribute not available to functions with arguments "
6975                  "passed on the stack", name);
6976           return true;
6977         }
6978       first_param = false;
6979     }
6980
6981   /* Error out for variadic functions since we cannot control how many
6982      arguments will be passed and thus stack could be used.  stdarg_p () is not
6983      used for the checking to avoid browsing arguments twice.  */
6984   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6985     {
6986       error ("%qE attribute not available to functions with variable number "
6987              "of arguments", name);
6988       return true;
6989     }
6990
6991   /* Error out if return value is passed on the stack.  */
6992   ret_type = TREE_TYPE (fntype);
6993   if (arm_return_in_memory (ret_type, fntype))
6994     {
6995       error ("%qE attribute not available to functions that return value on "
6996              "the stack", name);
6997       return true;
6998     }
6999   return false;
7000 }
7001
7002 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7003    function will check whether the attribute is allowed here and will add the
7004    attribute to the function declaration tree or otherwise issue a warning.  */
7005
7006 static tree
7007 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7008                                  tree /* args */,
7009                                  int /* flags */,
7010                                  bool *no_add_attrs)
7011 {
7012   tree fndecl;
7013
7014   if (!use_cmse)
7015     {
7016       *no_add_attrs = true;
7017       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7018                name);
7019       return NULL_TREE;
7020     }
7021
7022   /* Ignore attribute for function types.  */
7023   if (TREE_CODE (*node) != FUNCTION_DECL)
7024     {
7025       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7026                name);
7027       *no_add_attrs = true;
7028       return NULL_TREE;
7029     }
7030
7031   fndecl = *node;
7032
7033   /* Warn for static linkage functions.  */
7034   if (!TREE_PUBLIC (fndecl))
7035     {
7036       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7037                "with static linkage", name);
7038       *no_add_attrs = true;
7039       return NULL_TREE;
7040     }
7041
7042   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7043                                                 TREE_TYPE (fndecl));
7044   return NULL_TREE;
7045 }
7046
7047
7048 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7049    function will check whether the attribute is allowed here and will add the
7050    attribute to the function type tree or otherwise issue a diagnostic.  The
7051    reason we check this at declaration time is to only allow the use of the
7052    attribute with declarations of function pointers and not function
7053    declarations.  This function checks NODE is of the expected type and issues
7054    diagnostics otherwise using NAME.  If it is not of the expected type
7055    *NO_ADD_ATTRS will be set to true.  */
7056
7057 static tree
7058 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7059                                  tree /* args */,
7060                                  int /* flags */,
7061                                  bool *no_add_attrs)
7062 {
7063   tree decl = NULL_TREE, fntype = NULL_TREE;
7064   tree type;
7065
7066   if (!use_cmse)
7067     {
7068       *no_add_attrs = true;
7069       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7070                name);
7071       return NULL_TREE;
7072     }
7073
7074   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7075     {
7076       decl = *node;
7077       fntype = TREE_TYPE (decl);
7078     }
7079
7080   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7081     fntype = TREE_TYPE (fntype);
7082
7083   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7084     {
7085         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7086                  "function pointer", name);
7087         *no_add_attrs = true;
7088         return NULL_TREE;
7089     }
7090
7091   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7092
7093   if (*no_add_attrs)
7094     return NULL_TREE;
7095
7096   /* Prevent trees being shared among function types with and without
7097      cmse_nonsecure_call attribute.  */
7098   type = TREE_TYPE (decl);
7099
7100   type = build_distinct_type_copy (type);
7101   TREE_TYPE (decl) = type;
7102   fntype = type;
7103
7104   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7105     {
7106       type = fntype;
7107       fntype = TREE_TYPE (fntype);
7108       fntype = build_distinct_type_copy (fntype);
7109       TREE_TYPE (type) = fntype;
7110     }
7111
7112   /* Construct a type attribute and add it to the function type.  */
7113   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7114                           TYPE_ATTRIBUTES (fntype));
7115   TYPE_ATTRIBUTES (fntype) = attrs;
7116   return NULL_TREE;
7117 }
7118
7119 /* Return 0 if the attributes for two types are incompatible, 1 if they
7120    are compatible, and 2 if they are nearly compatible (which causes a
7121    warning to be generated).  */
7122 static int
7123 arm_comp_type_attributes (const_tree type1, const_tree type2)
7124 {
7125   int l1, l2, s1, s2;
7126
7127   /* Check for mismatch of non-default calling convention.  */
7128   if (TREE_CODE (type1) != FUNCTION_TYPE)
7129     return 1;
7130
7131   /* Check for mismatched call attributes.  */
7132   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7135   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7136
7137   /* Only bother to check if an attribute is defined.  */
7138   if (l1 | l2 | s1 | s2)
7139     {
7140       /* If one type has an attribute, the other must have the same attribute.  */
7141       if ((l1 != l2) || (s1 != s2))
7142         return 0;
7143
7144       /* Disallow mixed attributes.  */
7145       if ((l1 & s2) || (l2 & s1))
7146         return 0;
7147     }
7148
7149   /* Check for mismatched ISR attribute.  */
7150   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7151   if (! l1)
7152     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7153   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7154   if (! l2)
7155     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7156   if (l1 != l2)
7157     return 0;
7158
7159   l1 = lookup_attribute ("cmse_nonsecure_call",
7160                          TYPE_ATTRIBUTES (type1)) != NULL;
7161   l2 = lookup_attribute ("cmse_nonsecure_call",
7162                          TYPE_ATTRIBUTES (type2)) != NULL;
7163
7164   if (l1 != l2)
7165     return 0;
7166
7167   return 1;
7168 }
7169
7170 /*  Assigns default attributes to newly defined type.  This is used to
7171     set short_call/long_call attributes for function types of
7172     functions defined inside corresponding #pragma scopes.  */
7173 static void
7174 arm_set_default_type_attributes (tree type)
7175 {
7176   /* Add __attribute__ ((long_call)) to all functions, when
7177      inside #pragma long_calls or __attribute__ ((short_call)),
7178      when inside #pragma no_long_calls.  */
7179   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7180     {
7181       tree type_attr_list, attr_name;
7182       type_attr_list = TYPE_ATTRIBUTES (type);
7183
7184       if (arm_pragma_long_calls == LONG)
7185         attr_name = get_identifier ("long_call");
7186       else if (arm_pragma_long_calls == SHORT)
7187         attr_name = get_identifier ("short_call");
7188       else
7189         return;
7190
7191       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7192       TYPE_ATTRIBUTES (type) = type_attr_list;
7193     }
7194 }
7195 \f
7196 /* Return true if DECL is known to be linked into section SECTION.  */
7197
7198 static bool
7199 arm_function_in_section_p (tree decl, section *section)
7200 {
7201   /* We can only be certain about the prevailing symbol definition.  */
7202   if (!decl_binds_to_current_def_p (decl))
7203     return false;
7204
7205   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7206   if (!DECL_SECTION_NAME (decl))
7207     {
7208       /* Make sure that we will not create a unique section for DECL.  */
7209       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7210         return false;
7211     }
7212
7213   return function_section (decl) == section;
7214 }
7215
7216 /* Return nonzero if a 32-bit "long_call" should be generated for
7217    a call from the current function to DECL.  We generate a long_call
7218    if the function:
7219
7220         a.  has an __attribute__((long call))
7221      or b.  is within the scope of a #pragma long_calls
7222      or c.  the -mlong-calls command line switch has been specified
7223
7224    However we do not generate a long call if the function:
7225
7226         d.  has an __attribute__ ((short_call))
7227      or e.  is inside the scope of a #pragma no_long_calls
7228      or f.  is defined in the same section as the current function.  */
7229
7230 bool
7231 arm_is_long_call_p (tree decl)
7232 {
7233   tree attrs;
7234
7235   if (!decl)
7236     return TARGET_LONG_CALLS;
7237
7238   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7239   if (lookup_attribute ("short_call", attrs))
7240     return false;
7241
7242   /* For "f", be conservative, and only cater for cases in which the
7243      whole of the current function is placed in the same section.  */
7244   if (!flag_reorder_blocks_and_partition
7245       && TREE_CODE (decl) == FUNCTION_DECL
7246       && arm_function_in_section_p (decl, current_function_section ()))
7247     return false;
7248
7249   if (lookup_attribute ("long_call", attrs))
7250     return true;
7251
7252   return TARGET_LONG_CALLS;
7253 }
7254
7255 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7256 static bool
7257 arm_function_ok_for_sibcall (tree decl, tree exp)
7258 {
7259   unsigned long func_type;
7260
7261   if (cfun->machine->sibcall_blocked)
7262     return false;
7263
7264   /* Never tailcall something if we are generating code for Thumb-1.  */
7265   if (TARGET_THUMB1)
7266     return false;
7267
7268   /* The PIC register is live on entry to VxWorks PLT entries, so we
7269      must make the call before restoring the PIC register.  */
7270   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7271     return false;
7272
7273   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7274      may be used both as target of the call and base register for restoring
7275      the VFP registers  */
7276   if (TARGET_APCS_FRAME && TARGET_ARM
7277       && TARGET_HARD_FLOAT
7278       && decl && arm_is_long_call_p (decl))
7279     return false;
7280
7281   /* If we are interworking and the function is not declared static
7282      then we can't tail-call it unless we know that it exists in this
7283      compilation unit (since it might be a Thumb routine).  */
7284   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7285       && !TREE_ASM_WRITTEN (decl))
7286     return false;
7287
7288   func_type = arm_current_func_type ();
7289   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7290   if (IS_INTERRUPT (func_type))
7291     return false;
7292
7293   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7294      generated for entry functions themselves.  */
7295   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7296     return false;
7297
7298   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7299      this would complicate matters for later code generation.  */
7300   if (TREE_CODE (exp) == CALL_EXPR)
7301     {
7302       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7303       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7304         return false;
7305     }
7306
7307   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7308     {
7309       /* Check that the return value locations are the same.  For
7310          example that we aren't returning a value from the sibling in
7311          a VFP register but then need to transfer it to a core
7312          register.  */
7313       rtx a, b;
7314       tree decl_or_type = decl;
7315
7316       /* If it is an indirect function pointer, get the function type.  */
7317       if (!decl)
7318         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7319
7320       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7321       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7322                               cfun->decl, false);
7323       if (!rtx_equal_p (a, b))
7324         return false;
7325     }
7326
7327   /* Never tailcall if function may be called with a misaligned SP.  */
7328   if (IS_STACKALIGN (func_type))
7329     return false;
7330
7331   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7332      references should become a NOP.  Don't convert such calls into
7333      sibling calls.  */
7334   if (TARGET_AAPCS_BASED
7335       && arm_abi == ARM_ABI_AAPCS
7336       && decl
7337       && DECL_WEAK (decl))
7338     return false;
7339
7340   /* We cannot do a tailcall for an indirect call by descriptor if all the
7341      argument registers are used because the only register left to load the
7342      address is IP and it will already contain the static chain.  */
7343   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7344     {
7345       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7346       CUMULATIVE_ARGS cum;
7347       cumulative_args_t cum_v;
7348
7349       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7350       cum_v = pack_cumulative_args (&cum);
7351
7352       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7353         {
7354           tree type = TREE_VALUE (t);
7355           if (!VOID_TYPE_P (type))
7356             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7357         }
7358
7359       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7360         return false;
7361     }
7362
7363   /* Everything else is ok.  */
7364   return true;
7365 }
7366
7367 \f
7368 /* Addressing mode support functions.  */
7369
7370 /* Return nonzero if X is a legitimate immediate operand when compiling
7371    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7372 int
7373 legitimate_pic_operand_p (rtx x)
7374 {
7375   if (GET_CODE (x) == SYMBOL_REF
7376       || (GET_CODE (x) == CONST
7377           && GET_CODE (XEXP (x, 0)) == PLUS
7378           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7379     return 0;
7380
7381   return 1;
7382 }
7383
7384 /* Record that the current function needs a PIC register.  Initialize
7385    cfun->machine->pic_reg if we have not already done so.  */
7386
7387 static void
7388 require_pic_register (void)
7389 {
7390   /* A lot of the logic here is made obscure by the fact that this
7391      routine gets called as part of the rtx cost estimation process.
7392      We don't want those calls to affect any assumptions about the real
7393      function; and further, we can't call entry_of_function() until we
7394      start the real expansion process.  */
7395   if (!crtl->uses_pic_offset_table)
7396     {
7397       gcc_assert (can_create_pseudo_p ());
7398       if (arm_pic_register != INVALID_REGNUM
7399           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7400         {
7401           if (!cfun->machine->pic_reg)
7402             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7403
7404           /* Play games to avoid marking the function as needing pic
7405              if we are being called as part of the cost-estimation
7406              process.  */
7407           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7408             crtl->uses_pic_offset_table = 1;
7409         }
7410       else
7411         {
7412           rtx_insn *seq, *insn;
7413
7414           if (!cfun->machine->pic_reg)
7415             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7416
7417           /* Play games to avoid marking the function as needing pic
7418              if we are being called as part of the cost-estimation
7419              process.  */
7420           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7421             {
7422               crtl->uses_pic_offset_table = 1;
7423               start_sequence ();
7424
7425               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7426                   && arm_pic_register > LAST_LO_REGNUM)
7427                 emit_move_insn (cfun->machine->pic_reg,
7428                                 gen_rtx_REG (Pmode, arm_pic_register));
7429               else
7430                 arm_load_pic_register (0UL);
7431
7432               seq = get_insns ();
7433               end_sequence ();
7434
7435               for (insn = seq; insn; insn = NEXT_INSN (insn))
7436                 if (INSN_P (insn))
7437                   INSN_LOCATION (insn) = prologue_location;
7438
7439               /* We can be called during expansion of PHI nodes, where
7440                  we can't yet emit instructions directly in the final
7441                  insn stream.  Queue the insns on the entry edge, they will
7442                  be committed after everything else is expanded.  */
7443               insert_insn_on_edge (seq,
7444                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7445             }
7446         }
7447     }
7448 }
7449
7450 rtx
7451 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7452 {
7453   if (GET_CODE (orig) == SYMBOL_REF
7454       || GET_CODE (orig) == LABEL_REF)
7455     {
7456       if (reg == 0)
7457         {
7458           gcc_assert (can_create_pseudo_p ());
7459           reg = gen_reg_rtx (Pmode);
7460         }
7461
7462       /* VxWorks does not impose a fixed gap between segments; the run-time
7463          gap can be different from the object-file gap.  We therefore can't
7464          use GOTOFF unless we are absolutely sure that the symbol is in the
7465          same segment as the GOT.  Unfortunately, the flexibility of linker
7466          scripts means that we can't be sure of that in general, so assume
7467          that GOTOFF is never valid on VxWorks.  */
7468       /* References to weak symbols cannot be resolved locally: they
7469          may be overridden by a non-weak definition at link time.  */
7470       rtx_insn *insn;
7471       if ((GET_CODE (orig) == LABEL_REF
7472            || (GET_CODE (orig) == SYMBOL_REF
7473                && SYMBOL_REF_LOCAL_P (orig)
7474                && (SYMBOL_REF_DECL (orig)
7475                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7476           && NEED_GOT_RELOC
7477           && arm_pic_data_is_text_relative)
7478         insn = arm_pic_static_addr (orig, reg);
7479       else
7480         {
7481           rtx pat;
7482           rtx mem;
7483
7484           /* If this function doesn't have a pic register, create one now.  */
7485           require_pic_register ();
7486
7487           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7488
7489           /* Make the MEM as close to a constant as possible.  */
7490           mem = SET_SRC (pat);
7491           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7492           MEM_READONLY_P (mem) = 1;
7493           MEM_NOTRAP_P (mem) = 1;
7494
7495           insn = emit_insn (pat);
7496         }
7497
7498       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7499          by loop.  */
7500       set_unique_reg_note (insn, REG_EQUAL, orig);
7501
7502       return reg;
7503     }
7504   else if (GET_CODE (orig) == CONST)
7505     {
7506       rtx base, offset;
7507
7508       if (GET_CODE (XEXP (orig, 0)) == PLUS
7509           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7510         return orig;
7511
7512       /* Handle the case where we have: const (UNSPEC_TLS).  */
7513       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7514           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7515         return orig;
7516
7517       /* Handle the case where we have:
7518          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7519          CONST_INT.  */
7520       if (GET_CODE (XEXP (orig, 0)) == PLUS
7521           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7522           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7523         {
7524           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7525           return orig;
7526         }
7527
7528       if (reg == 0)
7529         {
7530           gcc_assert (can_create_pseudo_p ());
7531           reg = gen_reg_rtx (Pmode);
7532         }
7533
7534       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7535
7536       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7537       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7538                                        base == reg ? 0 : reg);
7539
7540       if (CONST_INT_P (offset))
7541         {
7542           /* The base register doesn't really matter, we only want to
7543              test the index for the appropriate mode.  */
7544           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7545             {
7546               gcc_assert (can_create_pseudo_p ());
7547               offset = force_reg (Pmode, offset);
7548             }
7549
7550           if (CONST_INT_P (offset))
7551             return plus_constant (Pmode, base, INTVAL (offset));
7552         }
7553
7554       if (GET_MODE_SIZE (mode) > 4
7555           && (GET_MODE_CLASS (mode) == MODE_INT
7556               || TARGET_SOFT_FLOAT))
7557         {
7558           emit_insn (gen_addsi3 (reg, base, offset));
7559           return reg;
7560         }
7561
7562       return gen_rtx_PLUS (Pmode, base, offset);
7563     }
7564
7565   return orig;
7566 }
7567
7568
7569 /* Find a spare register to use during the prolog of a function.  */
7570
7571 static int
7572 thumb_find_work_register (unsigned long pushed_regs_mask)
7573 {
7574   int reg;
7575
7576   /* Check the argument registers first as these are call-used.  The
7577      register allocation order means that sometimes r3 might be used
7578      but earlier argument registers might not, so check them all.  */
7579   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7580     if (!df_regs_ever_live_p (reg))
7581       return reg;
7582
7583   /* Before going on to check the call-saved registers we can try a couple
7584      more ways of deducing that r3 is available.  The first is when we are
7585      pushing anonymous arguments onto the stack and we have less than 4
7586      registers worth of fixed arguments(*).  In this case r3 will be part of
7587      the variable argument list and so we can be sure that it will be
7588      pushed right at the start of the function.  Hence it will be available
7589      for the rest of the prologue.
7590      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7591   if (cfun->machine->uses_anonymous_args
7592       && crtl->args.pretend_args_size > 0)
7593     return LAST_ARG_REGNUM;
7594
7595   /* The other case is when we have fixed arguments but less than 4 registers
7596      worth.  In this case r3 might be used in the body of the function, but
7597      it is not being used to convey an argument into the function.  In theory
7598      we could just check crtl->args.size to see how many bytes are
7599      being passed in argument registers, but it seems that it is unreliable.
7600      Sometimes it will have the value 0 when in fact arguments are being
7601      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7602      check the args_info.nregs field as well.  The problem with this field is
7603      that it makes no allowances for arguments that are passed to the
7604      function but which are not used.  Hence we could miss an opportunity
7605      when a function has an unused argument in r3.  But it is better to be
7606      safe than to be sorry.  */
7607   if (! cfun->machine->uses_anonymous_args
7608       && crtl->args.size >= 0
7609       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7610       && (TARGET_AAPCS_BASED
7611           ? crtl->args.info.aapcs_ncrn < 4
7612           : crtl->args.info.nregs < 4))
7613     return LAST_ARG_REGNUM;
7614
7615   /* Otherwise look for a call-saved register that is going to be pushed.  */
7616   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7617     if (pushed_regs_mask & (1 << reg))
7618       return reg;
7619
7620   if (TARGET_THUMB2)
7621     {
7622       /* Thumb-2 can use high regs.  */
7623       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7624         if (pushed_regs_mask & (1 << reg))
7625           return reg;
7626     }
7627   /* Something went wrong - thumb_compute_save_reg_mask()
7628      should have arranged for a suitable register to be pushed.  */
7629   gcc_unreachable ();
7630 }
7631
7632 static GTY(()) int pic_labelno;
7633
7634 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7635    low register.  */
7636
7637 void
7638 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7639 {
7640   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7641
7642   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7643     return;
7644
7645   gcc_assert (flag_pic);
7646
7647   pic_reg = cfun->machine->pic_reg;
7648   if (TARGET_VXWORKS_RTP)
7649     {
7650       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7651       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7652       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7653
7654       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7655
7656       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7657       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7658     }
7659   else
7660     {
7661       /* We use an UNSPEC rather than a LABEL_REF because this label
7662          never appears in the code stream.  */
7663
7664       labelno = GEN_INT (pic_labelno++);
7665       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7666       l1 = gen_rtx_CONST (VOIDmode, l1);
7667
7668       /* On the ARM the PC register contains 'dot + 8' at the time of the
7669          addition, on the Thumb it is 'dot + 4'.  */
7670       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7671       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7672                                 UNSPEC_GOTSYM_OFF);
7673       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7674
7675       if (TARGET_32BIT)
7676         {
7677           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7678         }
7679       else /* TARGET_THUMB1 */
7680         {
7681           if (arm_pic_register != INVALID_REGNUM
7682               && REGNO (pic_reg) > LAST_LO_REGNUM)
7683             {
7684               /* We will have pushed the pic register, so we should always be
7685                  able to find a work register.  */
7686               pic_tmp = gen_rtx_REG (SImode,
7687                                      thumb_find_work_register (saved_regs));
7688               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7689               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7690               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7691             }
7692           else if (arm_pic_register != INVALID_REGNUM
7693                    && arm_pic_register > LAST_LO_REGNUM
7694                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7695             {
7696               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7697               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7698               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7699             }
7700           else
7701             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7702         }
7703     }
7704
7705   /* Need to emit this whether or not we obey regdecls,
7706      since setjmp/longjmp can cause life info to screw up.  */
7707   emit_use (pic_reg);
7708 }
7709
7710 /* Generate code to load the address of a static var when flag_pic is set.  */
7711 static rtx_insn *
7712 arm_pic_static_addr (rtx orig, rtx reg)
7713 {
7714   rtx l1, labelno, offset_rtx;
7715
7716   gcc_assert (flag_pic);
7717
7718   /* We use an UNSPEC rather than a LABEL_REF because this label
7719      never appears in the code stream.  */
7720   labelno = GEN_INT (pic_labelno++);
7721   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7722   l1 = gen_rtx_CONST (VOIDmode, l1);
7723
7724   /* On the ARM the PC register contains 'dot + 8' at the time of the
7725      addition, on the Thumb it is 'dot + 4'.  */
7726   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7727   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7728                                UNSPEC_SYMBOL_OFFSET);
7729   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7730
7731   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7732 }
7733
7734 /* Return nonzero if X is valid as an ARM state addressing register.  */
7735 static int
7736 arm_address_register_rtx_p (rtx x, int strict_p)
7737 {
7738   int regno;
7739
7740   if (!REG_P (x))
7741     return 0;
7742
7743   regno = REGNO (x);
7744
7745   if (strict_p)
7746     return ARM_REGNO_OK_FOR_BASE_P (regno);
7747
7748   return (regno <= LAST_ARM_REGNUM
7749           || regno >= FIRST_PSEUDO_REGISTER
7750           || regno == FRAME_POINTER_REGNUM
7751           || regno == ARG_POINTER_REGNUM);
7752 }
7753
7754 /* Return TRUE if this rtx is the difference of a symbol and a label,
7755    and will reduce to a PC-relative relocation in the object file.
7756    Expressions like this can be left alone when generating PIC, rather
7757    than forced through the GOT.  */
7758 static int
7759 pcrel_constant_p (rtx x)
7760 {
7761   if (GET_CODE (x) == MINUS)
7762     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7763
7764   return FALSE;
7765 }
7766
7767 /* Return true if X will surely end up in an index register after next
7768    splitting pass.  */
7769 static bool
7770 will_be_in_index_register (const_rtx x)
7771 {
7772   /* arm.md: calculate_pic_address will split this into a register.  */
7773   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7774 }
7775
7776 /* Return nonzero if X is a valid ARM state address operand.  */
7777 int
7778 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7779                                 int strict_p)
7780 {
7781   bool use_ldrd;
7782   enum rtx_code code = GET_CODE (x);
7783
7784   if (arm_address_register_rtx_p (x, strict_p))
7785     return 1;
7786
7787   use_ldrd = (TARGET_LDRD
7788               && (mode == DImode || mode == DFmode));
7789
7790   if (code == POST_INC || code == PRE_DEC
7791       || ((code == PRE_INC || code == POST_DEC)
7792           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7793     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7794
7795   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7796            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7797            && GET_CODE (XEXP (x, 1)) == PLUS
7798            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7799     {
7800       rtx addend = XEXP (XEXP (x, 1), 1);
7801
7802       /* Don't allow ldrd post increment by register because it's hard
7803          to fixup invalid register choices.  */
7804       if (use_ldrd
7805           && GET_CODE (x) == POST_MODIFY
7806           && REG_P (addend))
7807         return 0;
7808
7809       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7810               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7811     }
7812
7813   /* After reload constants split into minipools will have addresses
7814      from a LABEL_REF.  */
7815   else if (reload_completed
7816            && (code == LABEL_REF
7817                || (code == CONST
7818                    && GET_CODE (XEXP (x, 0)) == PLUS
7819                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7820                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7821     return 1;
7822
7823   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7824     return 0;
7825
7826   else if (code == PLUS)
7827     {
7828       rtx xop0 = XEXP (x, 0);
7829       rtx xop1 = XEXP (x, 1);
7830
7831       return ((arm_address_register_rtx_p (xop0, strict_p)
7832                && ((CONST_INT_P (xop1)
7833                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7834                    || (!strict_p && will_be_in_index_register (xop1))))
7835               || (arm_address_register_rtx_p (xop1, strict_p)
7836                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7837     }
7838
7839 #if 0
7840   /* Reload currently can't handle MINUS, so disable this for now */
7841   else if (GET_CODE (x) == MINUS)
7842     {
7843       rtx xop0 = XEXP (x, 0);
7844       rtx xop1 = XEXP (x, 1);
7845
7846       return (arm_address_register_rtx_p (xop0, strict_p)
7847               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7848     }
7849 #endif
7850
7851   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7852            && code == SYMBOL_REF
7853            && CONSTANT_POOL_ADDRESS_P (x)
7854            && ! (flag_pic
7855                  && symbol_mentioned_p (get_pool_constant (x))
7856                  && ! pcrel_constant_p (get_pool_constant (x))))
7857     return 1;
7858
7859   return 0;
7860 }
7861
7862 /* Return true if we can avoid creating a constant pool entry for x.  */
7863 static bool
7864 can_avoid_literal_pool_for_label_p (rtx x)
7865 {
7866   /* Normally we can assign constant values to target registers without
7867      the help of constant pool.  But there are cases we have to use constant
7868      pool like:
7869      1) assign a label to register.
7870      2) sign-extend a 8bit value to 32bit and then assign to register.
7871
7872      Constant pool access in format:
7873      (set (reg r0) (mem (symbol_ref (".LC0"))))
7874      will cause the use of literal pool (later in function arm_reorg).
7875      So here we mark such format as an invalid format, then the compiler
7876      will adjust it into:
7877      (set (reg r0) (symbol_ref (".LC0")))
7878      (set (reg r0) (mem (reg r0))).
7879      No extra register is required, and (mem (reg r0)) won't cause the use
7880      of literal pools.  */
7881   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7882       && CONSTANT_POOL_ADDRESS_P (x))
7883     return 1;
7884   return 0;
7885 }
7886
7887
7888 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7889 static int
7890 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7891 {
7892   bool use_ldrd;
7893   enum rtx_code code = GET_CODE (x);
7894
7895   if (arm_address_register_rtx_p (x, strict_p))
7896     return 1;
7897
7898   use_ldrd = (TARGET_LDRD
7899               && (mode == DImode || mode == DFmode));
7900
7901   if (code == POST_INC || code == PRE_DEC
7902       || ((code == PRE_INC || code == POST_DEC)
7903           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7904     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7905
7906   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7907            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7908            && GET_CODE (XEXP (x, 1)) == PLUS
7909            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7910     {
7911       /* Thumb-2 only has autoincrement by constant.  */
7912       rtx addend = XEXP (XEXP (x, 1), 1);
7913       HOST_WIDE_INT offset;
7914
7915       if (!CONST_INT_P (addend))
7916         return 0;
7917
7918       offset = INTVAL(addend);
7919       if (GET_MODE_SIZE (mode) <= 4)
7920         return (offset > -256 && offset < 256);
7921
7922       return (use_ldrd && offset > -1024 && offset < 1024
7923               && (offset & 3) == 0);
7924     }
7925
7926   /* After reload constants split into minipools will have addresses
7927      from a LABEL_REF.  */
7928   else if (reload_completed
7929            && (code == LABEL_REF
7930                || (code == CONST
7931                    && GET_CODE (XEXP (x, 0)) == PLUS
7932                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7933                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7934     return 1;
7935
7936   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7937     return 0;
7938
7939   else if (code == PLUS)
7940     {
7941       rtx xop0 = XEXP (x, 0);
7942       rtx xop1 = XEXP (x, 1);
7943
7944       return ((arm_address_register_rtx_p (xop0, strict_p)
7945                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7946                    || (!strict_p && will_be_in_index_register (xop1))))
7947               || (arm_address_register_rtx_p (xop1, strict_p)
7948                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7949     }
7950
7951   else if (can_avoid_literal_pool_for_label_p (x))
7952     return 0;
7953
7954   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7955            && code == SYMBOL_REF
7956            && CONSTANT_POOL_ADDRESS_P (x)
7957            && ! (flag_pic
7958                  && symbol_mentioned_p (get_pool_constant (x))
7959                  && ! pcrel_constant_p (get_pool_constant (x))))
7960     return 1;
7961
7962   return 0;
7963 }
7964
7965 /* Return nonzero if INDEX is valid for an address index operand in
7966    ARM state.  */
7967 static int
7968 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7969                         int strict_p)
7970 {
7971   HOST_WIDE_INT range;
7972   enum rtx_code code = GET_CODE (index);
7973
7974   /* Standard coprocessor addressing modes.  */
7975   if (TARGET_HARD_FLOAT
7976       && (mode == SFmode || mode == DFmode))
7977     return (code == CONST_INT && INTVAL (index) < 1024
7978             && INTVAL (index) > -1024
7979             && (INTVAL (index) & 3) == 0);
7980
7981   /* For quad modes, we restrict the constant offset to be slightly less
7982      than what the instruction format permits.  We do this because for
7983      quad mode moves, we will actually decompose them into two separate
7984      double-mode reads or writes.  INDEX must therefore be a valid
7985      (double-mode) offset and so should INDEX+8.  */
7986   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7987     return (code == CONST_INT
7988             && INTVAL (index) < 1016
7989             && INTVAL (index) > -1024
7990             && (INTVAL (index) & 3) == 0);
7991
7992   /* We have no such constraint on double mode offsets, so we permit the
7993      full range of the instruction format.  */
7994   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7995     return (code == CONST_INT
7996             && INTVAL (index) < 1024
7997             && INTVAL (index) > -1024
7998             && (INTVAL (index) & 3) == 0);
7999
8000   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8001     return (code == CONST_INT
8002             && INTVAL (index) < 1024
8003             && INTVAL (index) > -1024
8004             && (INTVAL (index) & 3) == 0);
8005
8006   if (arm_address_register_rtx_p (index, strict_p)
8007       && (GET_MODE_SIZE (mode) <= 4))
8008     return 1;
8009
8010   if (mode == DImode || mode == DFmode)
8011     {
8012       if (code == CONST_INT)
8013         {
8014           HOST_WIDE_INT val = INTVAL (index);
8015
8016           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8017              If vldr is selected it uses arm_coproc_mem_operand.  */
8018           if (TARGET_LDRD)
8019             return val > -256 && val < 256;
8020           else
8021             return val > -4096 && val < 4092;
8022         }
8023
8024       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8025     }
8026
8027   if (GET_MODE_SIZE (mode) <= 4
8028       && ! (arm_arch4
8029             && (mode == HImode
8030                 || mode == HFmode
8031                 || (mode == QImode && outer == SIGN_EXTEND))))
8032     {
8033       if (code == MULT)
8034         {
8035           rtx xiop0 = XEXP (index, 0);
8036           rtx xiop1 = XEXP (index, 1);
8037
8038           return ((arm_address_register_rtx_p (xiop0, strict_p)
8039                    && power_of_two_operand (xiop1, SImode))
8040                   || (arm_address_register_rtx_p (xiop1, strict_p)
8041                       && power_of_two_operand (xiop0, SImode)));
8042         }
8043       else if (code == LSHIFTRT || code == ASHIFTRT
8044                || code == ASHIFT || code == ROTATERT)
8045         {
8046           rtx op = XEXP (index, 1);
8047
8048           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8049                   && CONST_INT_P (op)
8050                   && INTVAL (op) > 0
8051                   && INTVAL (op) <= 31);
8052         }
8053     }
8054
8055   /* For ARM v4 we may be doing a sign-extend operation during the
8056      load.  */
8057   if (arm_arch4)
8058     {
8059       if (mode == HImode
8060           || mode == HFmode
8061           || (outer == SIGN_EXTEND && mode == QImode))
8062         range = 256;
8063       else
8064         range = 4096;
8065     }
8066   else
8067     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8068
8069   return (code == CONST_INT
8070           && INTVAL (index) < range
8071           && INTVAL (index) > -range);
8072 }
8073
8074 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8075    index operand.  i.e. 1, 2, 4 or 8.  */
8076 static bool
8077 thumb2_index_mul_operand (rtx op)
8078 {
8079   HOST_WIDE_INT val;
8080
8081   if (!CONST_INT_P (op))
8082     return false;
8083
8084   val = INTVAL(op);
8085   return (val == 1 || val == 2 || val == 4 || val == 8);
8086 }
8087
8088 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8089 static int
8090 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8091 {
8092   enum rtx_code code = GET_CODE (index);
8093
8094   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8095   /* Standard coprocessor addressing modes.  */
8096   if (TARGET_HARD_FLOAT
8097       && (mode == SFmode || mode == DFmode))
8098     return (code == CONST_INT && INTVAL (index) < 1024
8099             /* Thumb-2 allows only > -256 index range for it's core register
8100                load/stores. Since we allow SF/DF in core registers, we have
8101                to use the intersection between -256~4096 (core) and -1024~1024
8102                (coprocessor).  */
8103             && INTVAL (index) > -256
8104             && (INTVAL (index) & 3) == 0);
8105
8106   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8107     {
8108       /* For DImode assume values will usually live in core regs
8109          and only allow LDRD addressing modes.  */
8110       if (!TARGET_LDRD || mode != DImode)
8111         return (code == CONST_INT
8112                 && INTVAL (index) < 1024
8113                 && INTVAL (index) > -1024
8114                 && (INTVAL (index) & 3) == 0);
8115     }
8116
8117   /* For quad modes, we restrict the constant offset to be slightly less
8118      than what the instruction format permits.  We do this because for
8119      quad mode moves, we will actually decompose them into two separate
8120      double-mode reads or writes.  INDEX must therefore be a valid
8121      (double-mode) offset and so should INDEX+8.  */
8122   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8123     return (code == CONST_INT
8124             && INTVAL (index) < 1016
8125             && INTVAL (index) > -1024
8126             && (INTVAL (index) & 3) == 0);
8127
8128   /* We have no such constraint on double mode offsets, so we permit the
8129      full range of the instruction format.  */
8130   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8131     return (code == CONST_INT
8132             && INTVAL (index) < 1024
8133             && INTVAL (index) > -1024
8134             && (INTVAL (index) & 3) == 0);
8135
8136   if (arm_address_register_rtx_p (index, strict_p)
8137       && (GET_MODE_SIZE (mode) <= 4))
8138     return 1;
8139
8140   if (mode == DImode || mode == DFmode)
8141     {
8142       if (code == CONST_INT)
8143         {
8144           HOST_WIDE_INT val = INTVAL (index);
8145           /* Thumb-2 ldrd only has reg+const addressing modes.
8146              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8147              If vldr is selected it uses arm_coproc_mem_operand.  */
8148           if (TARGET_LDRD)
8149             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8150           else
8151             return IN_RANGE (val, -255, 4095 - 4);
8152         }
8153       else
8154         return 0;
8155     }
8156
8157   if (code == MULT)
8158     {
8159       rtx xiop0 = XEXP (index, 0);
8160       rtx xiop1 = XEXP (index, 1);
8161
8162       return ((arm_address_register_rtx_p (xiop0, strict_p)
8163                && thumb2_index_mul_operand (xiop1))
8164               || (arm_address_register_rtx_p (xiop1, strict_p)
8165                   && thumb2_index_mul_operand (xiop0)));
8166     }
8167   else if (code == ASHIFT)
8168     {
8169       rtx op = XEXP (index, 1);
8170
8171       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8172               && CONST_INT_P (op)
8173               && INTVAL (op) > 0
8174               && INTVAL (op) <= 3);
8175     }
8176
8177   return (code == CONST_INT
8178           && INTVAL (index) < 4096
8179           && INTVAL (index) > -256);
8180 }
8181
8182 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8183 static int
8184 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8185 {
8186   int regno;
8187
8188   if (!REG_P (x))
8189     return 0;
8190
8191   regno = REGNO (x);
8192
8193   if (strict_p)
8194     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8195
8196   return (regno <= LAST_LO_REGNUM
8197           || regno > LAST_VIRTUAL_REGISTER
8198           || regno == FRAME_POINTER_REGNUM
8199           || (GET_MODE_SIZE (mode) >= 4
8200               && (regno == STACK_POINTER_REGNUM
8201                   || regno >= FIRST_PSEUDO_REGISTER
8202                   || x == hard_frame_pointer_rtx
8203                   || x == arg_pointer_rtx)));
8204 }
8205
8206 /* Return nonzero if x is a legitimate index register.  This is the case
8207    for any base register that can access a QImode object.  */
8208 inline static int
8209 thumb1_index_register_rtx_p (rtx x, int strict_p)
8210 {
8211   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8212 }
8213
8214 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8215
8216    The AP may be eliminated to either the SP or the FP, so we use the
8217    least common denominator, e.g. SImode, and offsets from 0 to 64.
8218
8219    ??? Verify whether the above is the right approach.
8220
8221    ??? Also, the FP may be eliminated to the SP, so perhaps that
8222    needs special handling also.
8223
8224    ??? Look at how the mips16 port solves this problem.  It probably uses
8225    better ways to solve some of these problems.
8226
8227    Although it is not incorrect, we don't accept QImode and HImode
8228    addresses based on the frame pointer or arg pointer until the
8229    reload pass starts.  This is so that eliminating such addresses
8230    into stack based ones won't produce impossible code.  */
8231 int
8232 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8233 {
8234   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8235     return 0;
8236
8237   /* ??? Not clear if this is right.  Experiment.  */
8238   if (GET_MODE_SIZE (mode) < 4
8239       && !(reload_in_progress || reload_completed)
8240       && (reg_mentioned_p (frame_pointer_rtx, x)
8241           || reg_mentioned_p (arg_pointer_rtx, x)
8242           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8243           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8244           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8245           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8246     return 0;
8247
8248   /* Accept any base register.  SP only in SImode or larger.  */
8249   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8250     return 1;
8251
8252   /* This is PC relative data before arm_reorg runs.  */
8253   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8254            && GET_CODE (x) == SYMBOL_REF
8255            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8256     return 1;
8257
8258   /* This is PC relative data after arm_reorg runs.  */
8259   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8260            && reload_completed
8261            && (GET_CODE (x) == LABEL_REF
8262                || (GET_CODE (x) == CONST
8263                    && GET_CODE (XEXP (x, 0)) == PLUS
8264                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8265                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8266     return 1;
8267
8268   /* Post-inc indexing only supported for SImode and larger.  */
8269   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8270            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8271     return 1;
8272
8273   else if (GET_CODE (x) == PLUS)
8274     {
8275       /* REG+REG address can be any two index registers.  */
8276       /* We disallow FRAME+REG addressing since we know that FRAME
8277          will be replaced with STACK, and SP relative addressing only
8278          permits SP+OFFSET.  */
8279       if (GET_MODE_SIZE (mode) <= 4
8280           && XEXP (x, 0) != frame_pointer_rtx
8281           && XEXP (x, 1) != frame_pointer_rtx
8282           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8283           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8284               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8285         return 1;
8286
8287       /* REG+const has 5-7 bit offset for non-SP registers.  */
8288       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8289                 || XEXP (x, 0) == arg_pointer_rtx)
8290                && CONST_INT_P (XEXP (x, 1))
8291                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8292         return 1;
8293
8294       /* REG+const has 10-bit offset for SP, but only SImode and
8295          larger is supported.  */
8296       /* ??? Should probably check for DI/DFmode overflow here
8297          just like GO_IF_LEGITIMATE_OFFSET does.  */
8298       else if (REG_P (XEXP (x, 0))
8299                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8300                && GET_MODE_SIZE (mode) >= 4
8301                && CONST_INT_P (XEXP (x, 1))
8302                && INTVAL (XEXP (x, 1)) >= 0
8303                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8304                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8305         return 1;
8306
8307       else if (REG_P (XEXP (x, 0))
8308                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8309                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8310                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8311                        && REGNO (XEXP (x, 0))
8312                           <= LAST_VIRTUAL_POINTER_REGISTER))
8313                && GET_MODE_SIZE (mode) >= 4
8314                && CONST_INT_P (XEXP (x, 1))
8315                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8316         return 1;
8317     }
8318
8319   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8320            && GET_MODE_SIZE (mode) == 4
8321            && GET_CODE (x) == SYMBOL_REF
8322            && CONSTANT_POOL_ADDRESS_P (x)
8323            && ! (flag_pic
8324                  && symbol_mentioned_p (get_pool_constant (x))
8325                  && ! pcrel_constant_p (get_pool_constant (x))))
8326     return 1;
8327
8328   return 0;
8329 }
8330
8331 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8332    instruction of mode MODE.  */
8333 int
8334 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8335 {
8336   switch (GET_MODE_SIZE (mode))
8337     {
8338     case 1:
8339       return val >= 0 && val < 32;
8340
8341     case 2:
8342       return val >= 0 && val < 64 && (val & 1) == 0;
8343
8344     default:
8345       return (val >= 0
8346               && (val + GET_MODE_SIZE (mode)) <= 128
8347               && (val & 3) == 0);
8348     }
8349 }
8350
8351 bool
8352 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8353 {
8354   if (TARGET_ARM)
8355     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8356   else if (TARGET_THUMB2)
8357     return thumb2_legitimate_address_p (mode, x, strict_p);
8358   else /* if (TARGET_THUMB1) */
8359     return thumb1_legitimate_address_p (mode, x, strict_p);
8360 }
8361
8362 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8363
8364    Given an rtx X being reloaded into a reg required to be
8365    in class CLASS, return the class of reg to actually use.
8366    In general this is just CLASS, but for the Thumb core registers and
8367    immediate constants we prefer a LO_REGS class or a subset.  */
8368
8369 static reg_class_t
8370 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8371 {
8372   if (TARGET_32BIT)
8373     return rclass;
8374   else
8375     {
8376       if (rclass == GENERAL_REGS)
8377         return LO_REGS;
8378       else
8379         return rclass;
8380     }
8381 }
8382
8383 /* Build the SYMBOL_REF for __tls_get_addr.  */
8384
8385 static GTY(()) rtx tls_get_addr_libfunc;
8386
8387 static rtx
8388 get_tls_get_addr (void)
8389 {
8390   if (!tls_get_addr_libfunc)
8391     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8392   return tls_get_addr_libfunc;
8393 }
8394
8395 rtx
8396 arm_load_tp (rtx target)
8397 {
8398   if (!target)
8399     target = gen_reg_rtx (SImode);
8400
8401   if (TARGET_HARD_TP)
8402     {
8403       /* Can return in any reg.  */
8404       emit_insn (gen_load_tp_hard (target));
8405     }
8406   else
8407     {
8408       /* Always returned in r0.  Immediately copy the result into a pseudo,
8409          otherwise other uses of r0 (e.g. setting up function arguments) may
8410          clobber the value.  */
8411
8412       rtx tmp;
8413
8414       emit_insn (gen_load_tp_soft ());
8415
8416       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8417       emit_move_insn (target, tmp);
8418     }
8419   return target;
8420 }
8421
8422 static rtx
8423 load_tls_operand (rtx x, rtx reg)
8424 {
8425   rtx tmp;
8426
8427   if (reg == NULL_RTX)
8428     reg = gen_reg_rtx (SImode);
8429
8430   tmp = gen_rtx_CONST (SImode, x);
8431
8432   emit_move_insn (reg, tmp);
8433
8434   return reg;
8435 }
8436
8437 static rtx_insn *
8438 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8439 {
8440   rtx label, labelno, sum;
8441
8442   gcc_assert (reloc != TLS_DESCSEQ);
8443   start_sequence ();
8444
8445   labelno = GEN_INT (pic_labelno++);
8446   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8447   label = gen_rtx_CONST (VOIDmode, label);
8448
8449   sum = gen_rtx_UNSPEC (Pmode,
8450                         gen_rtvec (4, x, GEN_INT (reloc), label,
8451                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8452                         UNSPEC_TLS);
8453   reg = load_tls_operand (sum, reg);
8454
8455   if (TARGET_ARM)
8456     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8457   else
8458     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8459
8460   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8461                                      LCT_PURE, /* LCT_CONST?  */
8462                                      Pmode, reg, Pmode);
8463
8464   rtx_insn *insns = get_insns ();
8465   end_sequence ();
8466
8467   return insns;
8468 }
8469
8470 static rtx
8471 arm_tls_descseq_addr (rtx x, rtx reg)
8472 {
8473   rtx labelno = GEN_INT (pic_labelno++);
8474   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8475   rtx sum = gen_rtx_UNSPEC (Pmode,
8476                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8477                                        gen_rtx_CONST (VOIDmode, label),
8478                                        GEN_INT (!TARGET_ARM)),
8479                             UNSPEC_TLS);
8480   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8481
8482   emit_insn (gen_tlscall (x, labelno));
8483   if (!reg)
8484     reg = gen_reg_rtx (SImode);
8485   else
8486     gcc_assert (REGNO (reg) != R0_REGNUM);
8487
8488   emit_move_insn (reg, reg0);
8489
8490   return reg;
8491 }
8492
8493 rtx
8494 legitimize_tls_address (rtx x, rtx reg)
8495 {
8496   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8497   rtx_insn *insns;
8498   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8499
8500   switch (model)
8501     {
8502     case TLS_MODEL_GLOBAL_DYNAMIC:
8503       if (TARGET_GNU2_TLS)
8504         {
8505           reg = arm_tls_descseq_addr (x, reg);
8506
8507           tp = arm_load_tp (NULL_RTX);
8508
8509           dest = gen_rtx_PLUS (Pmode, tp, reg);
8510         }
8511       else
8512         {
8513           /* Original scheme */
8514           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8515           dest = gen_reg_rtx (Pmode);
8516           emit_libcall_block (insns, dest, ret, x);
8517         }
8518       return dest;
8519
8520     case TLS_MODEL_LOCAL_DYNAMIC:
8521       if (TARGET_GNU2_TLS)
8522         {
8523           reg = arm_tls_descseq_addr (x, reg);
8524
8525           tp = arm_load_tp (NULL_RTX);
8526
8527           dest = gen_rtx_PLUS (Pmode, tp, reg);
8528         }
8529       else
8530         {
8531           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8532
8533           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8534              share the LDM result with other LD model accesses.  */
8535           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8536                                 UNSPEC_TLS);
8537           dest = gen_reg_rtx (Pmode);
8538           emit_libcall_block (insns, dest, ret, eqv);
8539
8540           /* Load the addend.  */
8541           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8542                                                      GEN_INT (TLS_LDO32)),
8543                                    UNSPEC_TLS);
8544           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8545           dest = gen_rtx_PLUS (Pmode, dest, addend);
8546         }
8547       return dest;
8548
8549     case TLS_MODEL_INITIAL_EXEC:
8550       labelno = GEN_INT (pic_labelno++);
8551       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8552       label = gen_rtx_CONST (VOIDmode, label);
8553       sum = gen_rtx_UNSPEC (Pmode,
8554                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8555                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8556                             UNSPEC_TLS);
8557       reg = load_tls_operand (sum, reg);
8558
8559       if (TARGET_ARM)
8560         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8561       else if (TARGET_THUMB2)
8562         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8563       else
8564         {
8565           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8566           emit_move_insn (reg, gen_const_mem (SImode, reg));
8567         }
8568
8569       tp = arm_load_tp (NULL_RTX);
8570
8571       return gen_rtx_PLUS (Pmode, tp, reg);
8572
8573     case TLS_MODEL_LOCAL_EXEC:
8574       tp = arm_load_tp (NULL_RTX);
8575
8576       reg = gen_rtx_UNSPEC (Pmode,
8577                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8578                             UNSPEC_TLS);
8579       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8580
8581       return gen_rtx_PLUS (Pmode, tp, reg);
8582
8583     default:
8584       abort ();
8585     }
8586 }
8587
8588 /* Try machine-dependent ways of modifying an illegitimate address
8589    to be legitimate.  If we find one, return the new, valid address.  */
8590 rtx
8591 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8592 {
8593   if (arm_tls_referenced_p (x))
8594     {
8595       rtx addend = NULL;
8596
8597       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8598         {
8599           addend = XEXP (XEXP (x, 0), 1);
8600           x = XEXP (XEXP (x, 0), 0);
8601         }
8602
8603       if (GET_CODE (x) != SYMBOL_REF)
8604         return x;
8605
8606       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8607
8608       x = legitimize_tls_address (x, NULL_RTX);
8609
8610       if (addend)
8611         {
8612           x = gen_rtx_PLUS (SImode, x, addend);
8613           orig_x = x;
8614         }
8615       else
8616         return x;
8617     }
8618
8619   if (!TARGET_ARM)
8620     {
8621       /* TODO: legitimize_address for Thumb2.  */
8622       if (TARGET_THUMB2)
8623         return x;
8624       return thumb_legitimize_address (x, orig_x, mode);
8625     }
8626
8627   if (GET_CODE (x) == PLUS)
8628     {
8629       rtx xop0 = XEXP (x, 0);
8630       rtx xop1 = XEXP (x, 1);
8631
8632       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8633         xop0 = force_reg (SImode, xop0);
8634
8635       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8636           && !symbol_mentioned_p (xop1))
8637         xop1 = force_reg (SImode, xop1);
8638
8639       if (ARM_BASE_REGISTER_RTX_P (xop0)
8640           && CONST_INT_P (xop1))
8641         {
8642           HOST_WIDE_INT n, low_n;
8643           rtx base_reg, val;
8644           n = INTVAL (xop1);
8645
8646           /* VFP addressing modes actually allow greater offsets, but for
8647              now we just stick with the lowest common denominator.  */
8648           if (mode == DImode || mode == DFmode)
8649             {
8650               low_n = n & 0x0f;
8651               n &= ~0x0f;
8652               if (low_n > 4)
8653                 {
8654                   n += 16;
8655                   low_n -= 16;
8656                 }
8657             }
8658           else
8659             {
8660               low_n = ((mode) == TImode ? 0
8661                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8662               n -= low_n;
8663             }
8664
8665           base_reg = gen_reg_rtx (SImode);
8666           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8667           emit_move_insn (base_reg, val);
8668           x = plus_constant (Pmode, base_reg, low_n);
8669         }
8670       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8671         x = gen_rtx_PLUS (SImode, xop0, xop1);
8672     }
8673
8674   /* XXX We don't allow MINUS any more -- see comment in
8675      arm_legitimate_address_outer_p ().  */
8676   else if (GET_CODE (x) == MINUS)
8677     {
8678       rtx xop0 = XEXP (x, 0);
8679       rtx xop1 = XEXP (x, 1);
8680
8681       if (CONSTANT_P (xop0))
8682         xop0 = force_reg (SImode, xop0);
8683
8684       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8685         xop1 = force_reg (SImode, xop1);
8686
8687       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8688         x = gen_rtx_MINUS (SImode, xop0, xop1);
8689     }
8690
8691   /* Make sure to take full advantage of the pre-indexed addressing mode
8692      with absolute addresses which often allows for the base register to
8693      be factorized for multiple adjacent memory references, and it might
8694      even allows for the mini pool to be avoided entirely. */
8695   else if (CONST_INT_P (x) && optimize > 0)
8696     {
8697       unsigned int bits;
8698       HOST_WIDE_INT mask, base, index;
8699       rtx base_reg;
8700
8701       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8702          use a 8-bit index. So let's use a 12-bit index for SImode only and
8703          hope that arm_gen_constant will enable ldrb to use more bits. */
8704       bits = (mode == SImode) ? 12 : 8;
8705       mask = (1 << bits) - 1;
8706       base = INTVAL (x) & ~mask;
8707       index = INTVAL (x) & mask;
8708       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8709         {
8710           /* It'll most probably be more efficient to generate the base
8711              with more bits set and use a negative index instead. */
8712           base |= mask;
8713           index -= mask;
8714         }
8715       base_reg = force_reg (SImode, GEN_INT (base));
8716       x = plus_constant (Pmode, base_reg, index);
8717     }
8718
8719   if (flag_pic)
8720     {
8721       /* We need to find and carefully transform any SYMBOL and LABEL
8722          references; so go back to the original address expression.  */
8723       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8724
8725       if (new_x != orig_x)
8726         x = new_x;
8727     }
8728
8729   return x;
8730 }
8731
8732
8733 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8734    to be legitimate.  If we find one, return the new, valid address.  */
8735 rtx
8736 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8737 {
8738   if (GET_CODE (x) == PLUS
8739       && CONST_INT_P (XEXP (x, 1))
8740       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8741           || INTVAL (XEXP (x, 1)) < 0))
8742     {
8743       rtx xop0 = XEXP (x, 0);
8744       rtx xop1 = XEXP (x, 1);
8745       HOST_WIDE_INT offset = INTVAL (xop1);
8746
8747       /* Try and fold the offset into a biasing of the base register and
8748          then offsetting that.  Don't do this when optimizing for space
8749          since it can cause too many CSEs.  */
8750       if (optimize_size && offset >= 0
8751           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8752         {
8753           HOST_WIDE_INT delta;
8754
8755           if (offset >= 256)
8756             delta = offset - (256 - GET_MODE_SIZE (mode));
8757           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8758             delta = 31 * GET_MODE_SIZE (mode);
8759           else
8760             delta = offset & (~31 * GET_MODE_SIZE (mode));
8761
8762           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8763                                 NULL_RTX);
8764           x = plus_constant (Pmode, xop0, delta);
8765         }
8766       else if (offset < 0 && offset > -256)
8767         /* Small negative offsets are best done with a subtract before the
8768            dereference, forcing these into a register normally takes two
8769            instructions.  */
8770         x = force_operand (x, NULL_RTX);
8771       else
8772         {
8773           /* For the remaining cases, force the constant into a register.  */
8774           xop1 = force_reg (SImode, xop1);
8775           x = gen_rtx_PLUS (SImode, xop0, xop1);
8776         }
8777     }
8778   else if (GET_CODE (x) == PLUS
8779            && s_register_operand (XEXP (x, 1), SImode)
8780            && !s_register_operand (XEXP (x, 0), SImode))
8781     {
8782       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8783
8784       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8785     }
8786
8787   if (flag_pic)
8788     {
8789       /* We need to find and carefully transform any SYMBOL and LABEL
8790          references; so go back to the original address expression.  */
8791       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8792
8793       if (new_x != orig_x)
8794         x = new_x;
8795     }
8796
8797   return x;
8798 }
8799
8800 /* Return TRUE if X contains any TLS symbol references.  */
8801
8802 bool
8803 arm_tls_referenced_p (rtx x)
8804 {
8805   if (! TARGET_HAVE_TLS)
8806     return false;
8807
8808   subrtx_iterator::array_type array;
8809   FOR_EACH_SUBRTX (iter, array, x, ALL)
8810     {
8811       const_rtx x = *iter;
8812       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8813         {
8814           /* ARM currently does not provide relocations to encode TLS variables
8815              into AArch32 instructions, only data, so there is no way to
8816              currently implement these if a literal pool is disabled.  */
8817           if (arm_disable_literal_pool)
8818             sorry ("accessing thread-local storage is not currently supported "
8819                    "with -mpure-code or -mslow-flash-data");
8820
8821           return true;
8822         }
8823
8824       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8825          TLS offsets, not real symbol references.  */
8826       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8827         iter.skip_subrtxes ();
8828     }
8829   return false;
8830 }
8831
8832 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8833
8834    On the ARM, allow any integer (invalid ones are removed later by insn
8835    patterns), nice doubles and symbol_refs which refer to the function's
8836    constant pool XXX.
8837
8838    When generating pic allow anything.  */
8839
8840 static bool
8841 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8842 {
8843   return flag_pic || !label_mentioned_p (x);
8844 }
8845
8846 static bool
8847 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8848 {
8849   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8850      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8851      for ARMv8-M Baseline or later the result is valid.  */
8852   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8853     x = XEXP (x, 0);
8854
8855   return (CONST_INT_P (x)
8856           || CONST_DOUBLE_P (x)
8857           || CONSTANT_ADDRESS_P (x)
8858           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8859           || flag_pic);
8860 }
8861
8862 static bool
8863 arm_legitimate_constant_p (machine_mode mode, rtx x)
8864 {
8865   return (!arm_cannot_force_const_mem (mode, x)
8866           && (TARGET_32BIT
8867               ? arm_legitimate_constant_p_1 (mode, x)
8868               : thumb_legitimate_constant_p (mode, x)));
8869 }
8870
8871 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8872
8873 static bool
8874 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8875 {
8876   rtx base, offset;
8877
8878   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8879     {
8880       split_const (x, &base, &offset);
8881       if (GET_CODE (base) == SYMBOL_REF
8882           && !offset_within_block_p (base, INTVAL (offset)))
8883         return true;
8884     }
8885   return arm_tls_referenced_p (x);
8886 }
8887 \f
8888 #define REG_OR_SUBREG_REG(X)                                            \
8889   (REG_P (X)                                                    \
8890    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8891
8892 #define REG_OR_SUBREG_RTX(X)                    \
8893    (REG_P (X) ? (X) : SUBREG_REG (X))
8894
8895 static inline int
8896 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8897 {
8898   machine_mode mode = GET_MODE (x);
8899   int total, words;
8900
8901   switch (code)
8902     {
8903     case ASHIFT:
8904     case ASHIFTRT:
8905     case LSHIFTRT:
8906     case ROTATERT:
8907       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8908
8909     case PLUS:
8910     case MINUS:
8911     case COMPARE:
8912     case NEG:
8913     case NOT:
8914       return COSTS_N_INSNS (1);
8915
8916     case MULT:
8917       if (arm_arch6m && arm_m_profile_small_mul)
8918         return COSTS_N_INSNS (32);
8919
8920       if (CONST_INT_P (XEXP (x, 1)))
8921         {
8922           int cycles = 0;
8923           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8924
8925           while (i)
8926             {
8927               i >>= 2;
8928               cycles++;
8929             }
8930           return COSTS_N_INSNS (2) + cycles;
8931         }
8932       return COSTS_N_INSNS (1) + 16;
8933
8934     case SET:
8935       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8936          the mode.  */
8937       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8938       return (COSTS_N_INSNS (words)
8939               + 4 * ((MEM_P (SET_SRC (x)))
8940                      + MEM_P (SET_DEST (x))));
8941
8942     case CONST_INT:
8943       if (outer == SET)
8944         {
8945           if (UINTVAL (x) < 256
8946               /* 16-bit constant.  */
8947               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8948             return 0;
8949           if (thumb_shiftable_const (INTVAL (x)))
8950             return COSTS_N_INSNS (2);
8951           return COSTS_N_INSNS (3);
8952         }
8953       else if ((outer == PLUS || outer == COMPARE)
8954                && INTVAL (x) < 256 && INTVAL (x) > -256)
8955         return 0;
8956       else if ((outer == IOR || outer == XOR || outer == AND)
8957                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8958         return COSTS_N_INSNS (1);
8959       else if (outer == AND)
8960         {
8961           int i;
8962           /* This duplicates the tests in the andsi3 expander.  */
8963           for (i = 9; i <= 31; i++)
8964             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8965                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8966               return COSTS_N_INSNS (2);
8967         }
8968       else if (outer == ASHIFT || outer == ASHIFTRT
8969                || outer == LSHIFTRT)
8970         return 0;
8971       return COSTS_N_INSNS (2);
8972
8973     case CONST:
8974     case CONST_DOUBLE:
8975     case LABEL_REF:
8976     case SYMBOL_REF:
8977       return COSTS_N_INSNS (3);
8978
8979     case UDIV:
8980     case UMOD:
8981     case DIV:
8982     case MOD:
8983       return 100;
8984
8985     case TRUNCATE:
8986       return 99;
8987
8988     case AND:
8989     case XOR:
8990     case IOR:
8991       /* XXX guess.  */
8992       return 8;
8993
8994     case MEM:
8995       /* XXX another guess.  */
8996       /* Memory costs quite a lot for the first word, but subsequent words
8997          load at the equivalent of a single insn each.  */
8998       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8999               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9000                  ? 4 : 0));
9001
9002     case IF_THEN_ELSE:
9003       /* XXX a guess.  */
9004       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9005         return 14;
9006       return 2;
9007
9008     case SIGN_EXTEND:
9009     case ZERO_EXTEND:
9010       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9011       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9012
9013       if (mode == SImode)
9014         return total;
9015
9016       if (arm_arch6)
9017         return total + COSTS_N_INSNS (1);
9018
9019       /* Assume a two-shift sequence.  Increase the cost slightly so
9020          we prefer actual shifts over an extend operation.  */
9021       return total + 1 + COSTS_N_INSNS (2);
9022
9023     default:
9024       return 99;
9025     }
9026 }
9027
9028 /* Estimates the size cost of thumb1 instructions.
9029    For now most of the code is copied from thumb1_rtx_costs. We need more
9030    fine grain tuning when we have more related test cases.  */
9031 static inline int
9032 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9033 {
9034   machine_mode mode = GET_MODE (x);
9035   int words, cost;
9036
9037   switch (code)
9038     {
9039     case ASHIFT:
9040     case ASHIFTRT:
9041     case LSHIFTRT:
9042     case ROTATERT:
9043       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9044
9045     case PLUS:
9046     case MINUS:
9047       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9048          defined by RTL expansion, especially for the expansion of
9049          multiplication.  */
9050       if ((GET_CODE (XEXP (x, 0)) == MULT
9051            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9052           || (GET_CODE (XEXP (x, 1)) == MULT
9053               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9054         return COSTS_N_INSNS (2);
9055       /* Fall through.  */
9056     case COMPARE:
9057     case NEG:
9058     case NOT:
9059       return COSTS_N_INSNS (1);
9060
9061     case MULT:
9062       if (CONST_INT_P (XEXP (x, 1)))
9063         {
9064           /* Thumb1 mul instruction can't operate on const. We must Load it
9065              into a register first.  */
9066           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9067           /* For the targets which have a very small and high-latency multiply
9068              unit, we prefer to synthesize the mult with up to 5 instructions,
9069              giving a good balance between size and performance.  */
9070           if (arm_arch6m && arm_m_profile_small_mul)
9071             return COSTS_N_INSNS (5);
9072           else
9073             return COSTS_N_INSNS (1) + const_size;
9074         }
9075       return COSTS_N_INSNS (1);
9076
9077     case SET:
9078       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9079          the mode.  */
9080       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9081       cost = COSTS_N_INSNS (words);
9082       if (satisfies_constraint_J (SET_SRC (x))
9083           || satisfies_constraint_K (SET_SRC (x))
9084              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9085           || (CONST_INT_P (SET_SRC (x))
9086               && UINTVAL (SET_SRC (x)) >= 256
9087               && TARGET_HAVE_MOVT
9088               && satisfies_constraint_j (SET_SRC (x)))
9089              /* thumb1_movdi_insn.  */
9090           || ((words > 1) && MEM_P (SET_SRC (x))))
9091         cost += COSTS_N_INSNS (1);
9092       return cost;
9093
9094     case CONST_INT:
9095       if (outer == SET)
9096         {
9097           if (UINTVAL (x) < 256)
9098             return COSTS_N_INSNS (1);
9099           /* movw is 4byte long.  */
9100           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9101             return COSTS_N_INSNS (2);
9102           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9103           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9104             return COSTS_N_INSNS (2);
9105           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9106           if (thumb_shiftable_const (INTVAL (x)))
9107             return COSTS_N_INSNS (2);
9108           return COSTS_N_INSNS (3);
9109         }
9110       else if ((outer == PLUS || outer == COMPARE)
9111                && INTVAL (x) < 256 && INTVAL (x) > -256)
9112         return 0;
9113       else if ((outer == IOR || outer == XOR || outer == AND)
9114                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9115         return COSTS_N_INSNS (1);
9116       else if (outer == AND)
9117         {
9118           int i;
9119           /* This duplicates the tests in the andsi3 expander.  */
9120           for (i = 9; i <= 31; i++)
9121             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9122                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9123               return COSTS_N_INSNS (2);
9124         }
9125       else if (outer == ASHIFT || outer == ASHIFTRT
9126                || outer == LSHIFTRT)
9127         return 0;
9128       return COSTS_N_INSNS (2);
9129
9130     case CONST:
9131     case CONST_DOUBLE:
9132     case LABEL_REF:
9133     case SYMBOL_REF:
9134       return COSTS_N_INSNS (3);
9135
9136     case UDIV:
9137     case UMOD:
9138     case DIV:
9139     case MOD:
9140       return 100;
9141
9142     case TRUNCATE:
9143       return 99;
9144
9145     case AND:
9146     case XOR:
9147     case IOR:
9148       return COSTS_N_INSNS (1);
9149
9150     case MEM:
9151       return (COSTS_N_INSNS (1)
9152               + COSTS_N_INSNS (1)
9153                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9154               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9155                  ? COSTS_N_INSNS (1) : 0));
9156
9157     case IF_THEN_ELSE:
9158       /* XXX a guess.  */
9159       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9160         return 14;
9161       return 2;
9162
9163     case ZERO_EXTEND:
9164       /* XXX still guessing.  */
9165       switch (GET_MODE (XEXP (x, 0)))
9166         {
9167           case E_QImode:
9168             return (1 + (mode == DImode ? 4 : 0)
9169                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9170
9171           case E_HImode:
9172             return (4 + (mode == DImode ? 4 : 0)
9173                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9174
9175           case E_SImode:
9176             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9177
9178           default:
9179             return 99;
9180         }
9181
9182     default:
9183       return 99;
9184     }
9185 }
9186
9187 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9188    operand, then return the operand that is being shifted.  If the shift
9189    is not by a constant, then set SHIFT_REG to point to the operand.
9190    Return NULL if OP is not a shifter operand.  */
9191 static rtx
9192 shifter_op_p (rtx op, rtx *shift_reg)
9193 {
9194   enum rtx_code code = GET_CODE (op);
9195
9196   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9197       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9198     return XEXP (op, 0);
9199   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9200     return XEXP (op, 0);
9201   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9202            || code == ASHIFTRT)
9203     {
9204       if (!CONST_INT_P (XEXP (op, 1)))
9205         *shift_reg = XEXP (op, 1);
9206       return XEXP (op, 0);
9207     }
9208
9209   return NULL;
9210 }
9211
9212 static bool
9213 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9214 {
9215   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9216   rtx_code code = GET_CODE (x);
9217   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9218
9219   switch (XINT (x, 1))
9220     {
9221     case UNSPEC_UNALIGNED_LOAD:
9222       /* We can only do unaligned loads into the integer unit, and we can't
9223          use LDM or LDRD.  */
9224       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9225       if (speed_p)
9226         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9227                   + extra_cost->ldst.load_unaligned);
9228
9229 #ifdef NOT_YET
9230       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9231                                  ADDR_SPACE_GENERIC, speed_p);
9232 #endif
9233       return true;
9234
9235     case UNSPEC_UNALIGNED_STORE:
9236       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9237       if (speed_p)
9238         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9239                   + extra_cost->ldst.store_unaligned);
9240
9241       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9242 #ifdef NOT_YET
9243       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9244                                  ADDR_SPACE_GENERIC, speed_p);
9245 #endif
9246       return true;
9247
9248     case UNSPEC_VRINTZ:
9249     case UNSPEC_VRINTP:
9250     case UNSPEC_VRINTM:
9251     case UNSPEC_VRINTR:
9252     case UNSPEC_VRINTX:
9253     case UNSPEC_VRINTA:
9254       if (speed_p)
9255         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9256
9257       return true;
9258     default:
9259       *cost = COSTS_N_INSNS (2);
9260       break;
9261     }
9262   return true;
9263 }
9264
9265 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9266    call (one insn for -Os) and then one for processing the result.  */
9267 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9268
9269 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9270         do                                                              \
9271           {                                                             \
9272             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9273             if (shift_op != NULL                                        \
9274                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9275               {                                                         \
9276                 if (shift_reg)                                          \
9277                   {                                                     \
9278                     if (speed_p)                                        \
9279                       *cost += extra_cost->alu.arith_shift_reg;         \
9280                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9281                                        ASHIFT, 1, speed_p);             \
9282                   }                                                     \
9283                 else if (speed_p)                                       \
9284                   *cost += extra_cost->alu.arith_shift;                 \
9285                                                                         \
9286                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9287                                     ASHIFT, 0, speed_p)                 \
9288                           + rtx_cost (XEXP (x, 1 - IDX),                \
9289                                       GET_MODE (shift_op),              \
9290                                       OP, 1, speed_p));                 \
9291                 return true;                                            \
9292               }                                                         \
9293           }                                                             \
9294         while (0)
9295
9296 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9297    considering the costs of the addressing mode and memory access
9298    separately.  */
9299 static bool
9300 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9301                int *cost, bool speed_p)
9302 {
9303   machine_mode mode = GET_MODE (x);
9304
9305   *cost = COSTS_N_INSNS (1);
9306
9307   if (flag_pic
9308       && GET_CODE (XEXP (x, 0)) == PLUS
9309       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9310     /* This will be split into two instructions.  Add the cost of the
9311        additional instruction here.  The cost of the memory access is computed
9312        below.  See arm.md:calculate_pic_address.  */
9313     *cost += COSTS_N_INSNS (1);
9314
9315   /* Calculate cost of the addressing mode.  */
9316   if (speed_p)
9317     {
9318       arm_addr_mode_op op_type;
9319       switch (GET_CODE (XEXP (x, 0)))
9320         {
9321         default:
9322         case REG:
9323           op_type = AMO_DEFAULT;
9324           break;
9325         case MINUS:
9326           /* MINUS does not appear in RTL, but the architecture supports it,
9327              so handle this case defensively.  */
9328           /* fall through */
9329         case PLUS:
9330           op_type = AMO_NO_WB;
9331           break;
9332         case PRE_INC:
9333         case PRE_DEC:
9334         case POST_INC:
9335         case POST_DEC:
9336         case PRE_MODIFY:
9337         case POST_MODIFY:
9338           op_type = AMO_WB;
9339           break;
9340         }
9341
9342       if (VECTOR_MODE_P (mode))
9343           *cost += current_tune->addr_mode_costs->vector[op_type];
9344       else if (FLOAT_MODE_P (mode))
9345           *cost += current_tune->addr_mode_costs->fp[op_type];
9346       else
9347           *cost += current_tune->addr_mode_costs->integer[op_type];
9348     }
9349
9350   /* Calculate cost of memory access.  */
9351   if (speed_p)
9352     {
9353       if (FLOAT_MODE_P (mode))
9354         {
9355           if (GET_MODE_SIZE (mode) == 8)
9356             *cost += extra_cost->ldst.loadd;
9357           else
9358             *cost += extra_cost->ldst.loadf;
9359         }
9360       else if (VECTOR_MODE_P (mode))
9361         *cost += extra_cost->ldst.loadv;
9362       else
9363         {
9364           /* Integer modes */
9365           if (GET_MODE_SIZE (mode) == 8)
9366             *cost += extra_cost->ldst.ldrd;
9367           else
9368             *cost += extra_cost->ldst.load;
9369         }
9370     }
9371
9372   return true;
9373 }
9374
9375 /* RTX costs.  Make an estimate of the cost of executing the operation
9376    X, which is contained within an operation with code OUTER_CODE.
9377    SPEED_P indicates whether the cost desired is the performance cost,
9378    or the size cost.  The estimate is stored in COST and the return
9379    value is TRUE if the cost calculation is final, or FALSE if the
9380    caller should recurse through the operands of X to add additional
9381    costs.
9382
9383    We currently make no attempt to model the size savings of Thumb-2
9384    16-bit instructions.  At the normal points in compilation where
9385    this code is called we have no measure of whether the condition
9386    flags are live or not, and thus no realistic way to determine what
9387    the size will eventually be.  */
9388 static bool
9389 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9390                    const struct cpu_cost_table *extra_cost,
9391                    int *cost, bool speed_p)
9392 {
9393   machine_mode mode = GET_MODE (x);
9394
9395   *cost = COSTS_N_INSNS (1);
9396
9397   if (TARGET_THUMB1)
9398     {
9399       if (speed_p)
9400         *cost = thumb1_rtx_costs (x, code, outer_code);
9401       else
9402         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9403       return true;
9404     }
9405
9406   switch (code)
9407     {
9408     case SET:
9409       *cost = 0;
9410       /* SET RTXs don't have a mode so we get it from the destination.  */
9411       mode = GET_MODE (SET_DEST (x));
9412
9413       if (REG_P (SET_SRC (x))
9414           && REG_P (SET_DEST (x)))
9415         {
9416           /* Assume that most copies can be done with a single insn,
9417              unless we don't have HW FP, in which case everything
9418              larger than word mode will require two insns.  */
9419           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9420                                    && GET_MODE_SIZE (mode) > 4)
9421                                   || mode == DImode)
9422                                  ? 2 : 1);
9423           /* Conditional register moves can be encoded
9424              in 16 bits in Thumb mode.  */
9425           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9426             *cost >>= 1;
9427
9428           return true;
9429         }
9430
9431       if (CONST_INT_P (SET_SRC (x)))
9432         {
9433           /* Handle CONST_INT here, since the value doesn't have a mode
9434              and we would otherwise be unable to work out the true cost.  */
9435           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9436                             0, speed_p);
9437           outer_code = SET;
9438           /* Slightly lower the cost of setting a core reg to a constant.
9439              This helps break up chains and allows for better scheduling.  */
9440           if (REG_P (SET_DEST (x))
9441               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9442             *cost -= 1;
9443           x = SET_SRC (x);
9444           /* Immediate moves with an immediate in the range [0, 255] can be
9445              encoded in 16 bits in Thumb mode.  */
9446           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9447               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9448             *cost >>= 1;
9449           goto const_int_cost;
9450         }
9451
9452       return false;
9453
9454     case MEM:
9455       return arm_mem_costs (x, extra_cost, cost, speed_p);
9456
9457     case PARALLEL:
9458     {
9459    /* Calculations of LDM costs are complex.  We assume an initial cost
9460    (ldm_1st) which will load the number of registers mentioned in
9461    ldm_regs_per_insn_1st registers; then each additional
9462    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9463    formula for N regs is thus:
9464
9465    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9466                              + ldm_regs_per_insn_subsequent - 1)
9467                             / ldm_regs_per_insn_subsequent).
9468
9469    Additional costs may also be added for addressing.  A similar
9470    formula is used for STM.  */
9471
9472       bool is_ldm = load_multiple_operation (x, SImode);
9473       bool is_stm = store_multiple_operation (x, SImode);
9474
9475       if (is_ldm || is_stm)
9476         {
9477           if (speed_p)
9478             {
9479               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9480               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9481                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9482                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9483               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9484                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9485                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9486
9487               *cost += regs_per_insn_1st
9488                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9489                                             + regs_per_insn_sub - 1)
9490                                           / regs_per_insn_sub);
9491               return true;
9492             }
9493
9494         }
9495       return false;
9496     }
9497     case DIV:
9498     case UDIV:
9499       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9500           && (mode == SFmode || !TARGET_VFP_SINGLE))
9501         *cost += COSTS_N_INSNS (speed_p
9502                                ? extra_cost->fp[mode != SFmode].div : 0);
9503       else if (mode == SImode && TARGET_IDIV)
9504         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9505       else
9506         *cost = LIBCALL_COST (2);
9507
9508       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9509          possible udiv is prefered.  */
9510       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9511       return false;     /* All arguments must be in registers.  */
9512
9513     case MOD:
9514       /* MOD by a power of 2 can be expanded as:
9515          rsbs    r1, r0, #0
9516          and     r0, r0, #(n - 1)
9517          and     r1, r1, #(n - 1)
9518          rsbpl   r0, r1, #0.  */
9519       if (CONST_INT_P (XEXP (x, 1))
9520           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9521           && mode == SImode)
9522         {
9523           *cost += COSTS_N_INSNS (3);
9524
9525           if (speed_p)
9526             *cost += 2 * extra_cost->alu.logical
9527                      + extra_cost->alu.arith;
9528           return true;
9529         }
9530
9531     /* Fall-through.  */
9532     case UMOD:
9533       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9534          possible udiv is prefered.  */
9535       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9536       return false;     /* All arguments must be in registers.  */
9537
9538     case ROTATE:
9539       if (mode == SImode && REG_P (XEXP (x, 1)))
9540         {
9541           *cost += (COSTS_N_INSNS (1)
9542                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9543           if (speed_p)
9544             *cost += extra_cost->alu.shift_reg;
9545           return true;
9546         }
9547       /* Fall through */
9548     case ROTATERT:
9549     case ASHIFT:
9550     case LSHIFTRT:
9551     case ASHIFTRT:
9552       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9553         {
9554           *cost += (COSTS_N_INSNS (2)
9555                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9556           if (speed_p)
9557             *cost += 2 * extra_cost->alu.shift;
9558           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9559           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9560             *cost += 1;
9561           return true;
9562         }
9563       else if (mode == SImode)
9564         {
9565           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9566           /* Slightly disparage register shifts at -Os, but not by much.  */
9567           if (!CONST_INT_P (XEXP (x, 1)))
9568             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9570           return true;
9571         }
9572       else if (GET_MODE_CLASS (mode) == MODE_INT
9573                && GET_MODE_SIZE (mode) < 4)
9574         {
9575           if (code == ASHIFT)
9576             {
9577               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9578               /* Slightly disparage register shifts at -Os, but not by
9579                  much.  */
9580               if (!CONST_INT_P (XEXP (x, 1)))
9581                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9582                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9583             }
9584           else if (code == LSHIFTRT || code == ASHIFTRT)
9585             {
9586               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9587                 {
9588                   /* Can use SBFX/UBFX.  */
9589                   if (speed_p)
9590                     *cost += extra_cost->alu.bfx;
9591                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9592                 }
9593               else
9594                 {
9595                   *cost += COSTS_N_INSNS (1);
9596                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9597                   if (speed_p)
9598                     {
9599                       if (CONST_INT_P (XEXP (x, 1)))
9600                         *cost += 2 * extra_cost->alu.shift;
9601                       else
9602                         *cost += (extra_cost->alu.shift
9603                                   + extra_cost->alu.shift_reg);
9604                     }
9605                   else
9606                     /* Slightly disparage register shifts.  */
9607                     *cost += !CONST_INT_P (XEXP (x, 1));
9608                 }
9609             }
9610           else /* Rotates.  */
9611             {
9612               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9613               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9614               if (speed_p)
9615                 {
9616                   if (CONST_INT_P (XEXP (x, 1)))
9617                     *cost += (2 * extra_cost->alu.shift
9618                               + extra_cost->alu.log_shift);
9619                   else
9620                     *cost += (extra_cost->alu.shift
9621                               + extra_cost->alu.shift_reg
9622                               + extra_cost->alu.log_shift_reg);
9623                 }
9624             }
9625           return true;
9626         }
9627
9628       *cost = LIBCALL_COST (2);
9629       return false;
9630
9631     case BSWAP:
9632       if (arm_arch6)
9633         {
9634           if (mode == SImode)
9635             {
9636               if (speed_p)
9637                 *cost += extra_cost->alu.rev;
9638
9639               return false;
9640             }
9641         }
9642       else
9643         {
9644         /* No rev instruction available.  Look at arm_legacy_rev
9645            and thumb_legacy_rev for the form of RTL used then.  */
9646           if (TARGET_THUMB)
9647             {
9648               *cost += COSTS_N_INSNS (9);
9649
9650               if (speed_p)
9651                 {
9652                   *cost += 6 * extra_cost->alu.shift;
9653                   *cost += 3 * extra_cost->alu.logical;
9654                 }
9655             }
9656           else
9657             {
9658               *cost += COSTS_N_INSNS (4);
9659
9660               if (speed_p)
9661                 {
9662                   *cost += 2 * extra_cost->alu.shift;
9663                   *cost += extra_cost->alu.arith_shift;
9664                   *cost += 2 * extra_cost->alu.logical;
9665                 }
9666             }
9667           return true;
9668         }
9669       return false;
9670
9671     case MINUS:
9672       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9673           && (mode == SFmode || !TARGET_VFP_SINGLE))
9674         {
9675           if (GET_CODE (XEXP (x, 0)) == MULT
9676               || GET_CODE (XEXP (x, 1)) == MULT)
9677             {
9678               rtx mul_op0, mul_op1, sub_op;
9679
9680               if (speed_p)
9681                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9682
9683               if (GET_CODE (XEXP (x, 0)) == MULT)
9684                 {
9685                   mul_op0 = XEXP (XEXP (x, 0), 0);
9686                   mul_op1 = XEXP (XEXP (x, 0), 1);
9687                   sub_op = XEXP (x, 1);
9688                 }
9689               else
9690                 {
9691                   mul_op0 = XEXP (XEXP (x, 1), 0);
9692                   mul_op1 = XEXP (XEXP (x, 1), 1);
9693                   sub_op = XEXP (x, 0);
9694                 }
9695
9696               /* The first operand of the multiply may be optionally
9697                  negated.  */
9698               if (GET_CODE (mul_op0) == NEG)
9699                 mul_op0 = XEXP (mul_op0, 0);
9700
9701               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9702                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9703                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9704
9705               return true;
9706             }
9707
9708           if (speed_p)
9709             *cost += extra_cost->fp[mode != SFmode].addsub;
9710           return false;
9711         }
9712
9713       if (mode == SImode)
9714         {
9715           rtx shift_by_reg = NULL;
9716           rtx shift_op;
9717           rtx non_shift_op;
9718
9719           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9720           if (shift_op == NULL)
9721             {
9722               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9723               non_shift_op = XEXP (x, 0);
9724             }
9725           else
9726             non_shift_op = XEXP (x, 1);
9727
9728           if (shift_op != NULL)
9729             {
9730               if (shift_by_reg != NULL)
9731                 {
9732                   if (speed_p)
9733                     *cost += extra_cost->alu.arith_shift_reg;
9734                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9735                 }
9736               else if (speed_p)
9737                 *cost += extra_cost->alu.arith_shift;
9738
9739               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9740               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9741               return true;
9742             }
9743
9744           if (arm_arch_thumb2
9745               && GET_CODE (XEXP (x, 1)) == MULT)
9746             {
9747               /* MLS.  */
9748               if (speed_p)
9749                 *cost += extra_cost->mult[0].add;
9750               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9751               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9752               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9753               return true;
9754             }
9755
9756           if (CONST_INT_P (XEXP (x, 0)))
9757             {
9758               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9759                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9760                                             NULL_RTX, 1, 0);
9761               *cost = COSTS_N_INSNS (insns);
9762               if (speed_p)
9763                 *cost += insns * extra_cost->alu.arith;
9764               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9765               return true;
9766             }
9767           else if (speed_p)
9768             *cost += extra_cost->alu.arith;
9769
9770           return false;
9771         }
9772
9773       if (GET_MODE_CLASS (mode) == MODE_INT
9774           && GET_MODE_SIZE (mode) < 4)
9775         {
9776           rtx shift_op, shift_reg;
9777           shift_reg = NULL;
9778
9779           /* We check both sides of the MINUS for shifter operands since,
9780              unlike PLUS, it's not commutative.  */
9781
9782           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9783           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9784
9785           /* Slightly disparage, as we might need to widen the result.  */
9786           *cost += 1;
9787           if (speed_p)
9788             *cost += extra_cost->alu.arith;
9789
9790           if (CONST_INT_P (XEXP (x, 0)))
9791             {
9792               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9793               return true;
9794             }
9795
9796           return false;
9797         }
9798
9799       if (mode == DImode)
9800         {
9801           *cost += COSTS_N_INSNS (1);
9802
9803           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9804             {
9805               rtx op1 = XEXP (x, 1);
9806
9807               if (speed_p)
9808                 *cost += 2 * extra_cost->alu.arith;
9809
9810               if (GET_CODE (op1) == ZERO_EXTEND)
9811                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9812                                    0, speed_p);
9813               else
9814                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9815               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9816                                  0, speed_p);
9817               return true;
9818             }
9819           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9820             {
9821               if (speed_p)
9822                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9823               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9824                                   0, speed_p)
9825                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9826               return true;
9827             }
9828           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9829                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9830             {
9831               if (speed_p)
9832                 *cost += (extra_cost->alu.arith
9833                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9834                              ? extra_cost->alu.arith
9835                              : extra_cost->alu.arith_shift));
9836               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9837                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9838                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9839               return true;
9840             }
9841
9842           if (speed_p)
9843             *cost += 2 * extra_cost->alu.arith;
9844           return false;
9845         }
9846
9847       /* Vector mode?  */
9848
9849       *cost = LIBCALL_COST (2);
9850       return false;
9851
9852     case PLUS:
9853       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9854           && (mode == SFmode || !TARGET_VFP_SINGLE))
9855         {
9856           if (GET_CODE (XEXP (x, 0)) == MULT)
9857             {
9858               rtx mul_op0, mul_op1, add_op;
9859
9860               if (speed_p)
9861                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9862
9863               mul_op0 = XEXP (XEXP (x, 0), 0);
9864               mul_op1 = XEXP (XEXP (x, 0), 1);
9865               add_op = XEXP (x, 1);
9866
9867               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9868                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9869                         + rtx_cost (add_op, mode, code, 0, speed_p));
9870
9871               return true;
9872             }
9873
9874           if (speed_p)
9875             *cost += extra_cost->fp[mode != SFmode].addsub;
9876           return false;
9877         }
9878       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9879         {
9880           *cost = LIBCALL_COST (2);
9881           return false;
9882         }
9883
9884         /* Narrow modes can be synthesized in SImode, but the range
9885            of useful sub-operations is limited.  Check for shift operations
9886            on one of the operands.  Only left shifts can be used in the
9887            narrow modes.  */
9888       if (GET_MODE_CLASS (mode) == MODE_INT
9889           && GET_MODE_SIZE (mode) < 4)
9890         {
9891           rtx shift_op, shift_reg;
9892           shift_reg = NULL;
9893
9894           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9895
9896           if (CONST_INT_P (XEXP (x, 1)))
9897             {
9898               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9899                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9900                                             NULL_RTX, 1, 0);
9901               *cost = COSTS_N_INSNS (insns);
9902               if (speed_p)
9903                 *cost += insns * extra_cost->alu.arith;
9904               /* Slightly penalize a narrow operation as the result may
9905                  need widening.  */
9906               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9907               return true;
9908             }
9909
9910           /* Slightly penalize a narrow operation as the result may
9911              need widening.  */
9912           *cost += 1;
9913           if (speed_p)
9914             *cost += extra_cost->alu.arith;
9915
9916           return false;
9917         }
9918
9919       if (mode == SImode)
9920         {
9921           rtx shift_op, shift_reg;
9922
9923           if (TARGET_INT_SIMD
9924               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9925                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9926             {
9927               /* UXTA[BH] or SXTA[BH].  */
9928               if (speed_p)
9929                 *cost += extra_cost->alu.extend_arith;
9930               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9931                                   0, speed_p)
9932                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9933               return true;
9934             }
9935
9936           shift_reg = NULL;
9937           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9938           if (shift_op != NULL)
9939             {
9940               if (shift_reg)
9941                 {
9942                   if (speed_p)
9943                     *cost += extra_cost->alu.arith_shift_reg;
9944                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9945                 }
9946               else if (speed_p)
9947                 *cost += extra_cost->alu.arith_shift;
9948
9949               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9950                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9951               return true;
9952             }
9953           if (GET_CODE (XEXP (x, 0)) == MULT)
9954             {
9955               rtx mul_op = XEXP (x, 0);
9956
9957               if (TARGET_DSP_MULTIPLY
9958                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9959                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9960                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9961                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9962                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9963                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9964                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9965                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9966                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9967                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9968                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9969                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9970                                       == 16))))))
9971                 {
9972                   /* SMLA[BT][BT].  */
9973                   if (speed_p)
9974                     *cost += extra_cost->mult[0].extend_add;
9975                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9976                                       SIGN_EXTEND, 0, speed_p)
9977                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9978                                         SIGN_EXTEND, 0, speed_p)
9979                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9980                   return true;
9981                 }
9982
9983               if (speed_p)
9984                 *cost += extra_cost->mult[0].add;
9985               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9986                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9987                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9988               return true;
9989             }
9990           if (CONST_INT_P (XEXP (x, 1)))
9991             {
9992               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9993                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9994                                             NULL_RTX, 1, 0);
9995               *cost = COSTS_N_INSNS (insns);
9996               if (speed_p)
9997                 *cost += insns * extra_cost->alu.arith;
9998               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9999               return true;
10000             }
10001           else if (speed_p)
10002             *cost += extra_cost->alu.arith;
10003
10004           return false;
10005         }
10006
10007       if (mode == DImode)
10008         {
10009           if (arm_arch3m
10010               && GET_CODE (XEXP (x, 0)) == MULT
10011               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10012                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10013                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10014                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10015             {
10016               if (speed_p)
10017                 *cost += extra_cost->mult[1].extend_add;
10018               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10019                                   ZERO_EXTEND, 0, speed_p)
10020                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10021                                     ZERO_EXTEND, 0, speed_p)
10022                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10023               return true;
10024             }
10025
10026           *cost += COSTS_N_INSNS (1);
10027
10028           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10029               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10030             {
10031               if (speed_p)
10032                 *cost += (extra_cost->alu.arith
10033                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10034                              ? extra_cost->alu.arith
10035                              : extra_cost->alu.arith_shift));
10036
10037               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10038                                   0, speed_p)
10039                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10040               return true;
10041             }
10042
10043           if (speed_p)
10044             *cost += 2 * extra_cost->alu.arith;
10045           return false;
10046         }
10047
10048       /* Vector mode?  */
10049       *cost = LIBCALL_COST (2);
10050       return false;
10051     case IOR:
10052       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10053         {
10054           if (speed_p)
10055             *cost += extra_cost->alu.rev;
10056
10057           return true;
10058         }
10059     /* Fall through.  */
10060     case AND: case XOR:
10061       if (mode == SImode)
10062         {
10063           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10064           rtx op0 = XEXP (x, 0);
10065           rtx shift_op, shift_reg;
10066
10067           if (subcode == NOT
10068               && (code == AND
10069                   || (code == IOR && TARGET_THUMB2)))
10070             op0 = XEXP (op0, 0);
10071
10072           shift_reg = NULL;
10073           shift_op = shifter_op_p (op0, &shift_reg);
10074           if (shift_op != NULL)
10075             {
10076               if (shift_reg)
10077                 {
10078                   if (speed_p)
10079                     *cost += extra_cost->alu.log_shift_reg;
10080                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10081                 }
10082               else if (speed_p)
10083                 *cost += extra_cost->alu.log_shift;
10084
10085               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10086                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10087               return true;
10088             }
10089
10090           if (CONST_INT_P (XEXP (x, 1)))
10091             {
10092               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10093                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10094                                             NULL_RTX, 1, 0);
10095
10096               *cost = COSTS_N_INSNS (insns);
10097               if (speed_p)
10098                 *cost += insns * extra_cost->alu.logical;
10099               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10100               return true;
10101             }
10102
10103           if (speed_p)
10104             *cost += extra_cost->alu.logical;
10105           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10106                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10107           return true;
10108         }
10109
10110       if (mode == DImode)
10111         {
10112           rtx op0 = XEXP (x, 0);
10113           enum rtx_code subcode = GET_CODE (op0);
10114
10115           *cost += COSTS_N_INSNS (1);
10116
10117           if (subcode == NOT
10118               && (code == AND
10119                   || (code == IOR && TARGET_THUMB2)))
10120             op0 = XEXP (op0, 0);
10121
10122           if (GET_CODE (op0) == ZERO_EXTEND)
10123             {
10124               if (speed_p)
10125                 *cost += 2 * extra_cost->alu.logical;
10126
10127               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10128                                   0, speed_p)
10129                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10130               return true;
10131             }
10132           else if (GET_CODE (op0) == SIGN_EXTEND)
10133             {
10134               if (speed_p)
10135                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10136
10137               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10138                                   0, speed_p)
10139                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10140               return true;
10141             }
10142
10143           if (speed_p)
10144             *cost += 2 * extra_cost->alu.logical;
10145
10146           return true;
10147         }
10148       /* Vector mode?  */
10149
10150       *cost = LIBCALL_COST (2);
10151       return false;
10152
10153     case MULT:
10154       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10155           && (mode == SFmode || !TARGET_VFP_SINGLE))
10156         {
10157           rtx op0 = XEXP (x, 0);
10158
10159           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10160             op0 = XEXP (op0, 0);
10161
10162           if (speed_p)
10163             *cost += extra_cost->fp[mode != SFmode].mult;
10164
10165           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10166                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10167           return true;
10168         }
10169       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10170         {
10171           *cost = LIBCALL_COST (2);
10172           return false;
10173         }
10174
10175       if (mode == SImode)
10176         {
10177           if (TARGET_DSP_MULTIPLY
10178               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10179                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10180                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10181                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10182                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10183                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10184                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10185                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10186                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10187                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10188                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10189                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10190                                   == 16))))))
10191             {
10192               /* SMUL[TB][TB].  */
10193               if (speed_p)
10194                 *cost += extra_cost->mult[0].extend;
10195               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10196                                  SIGN_EXTEND, 0, speed_p);
10197               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10198                                  SIGN_EXTEND, 1, speed_p);
10199               return true;
10200             }
10201           if (speed_p)
10202             *cost += extra_cost->mult[0].simple;
10203           return false;
10204         }
10205
10206       if (mode == DImode)
10207         {
10208           if (arm_arch3m
10209               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10210                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10211                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10212                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10213             {
10214               if (speed_p)
10215                 *cost += extra_cost->mult[1].extend;
10216               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10217                                   ZERO_EXTEND, 0, speed_p)
10218                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10219                                     ZERO_EXTEND, 0, speed_p));
10220               return true;
10221             }
10222
10223           *cost = LIBCALL_COST (2);
10224           return false;
10225         }
10226
10227       /* Vector mode?  */
10228       *cost = LIBCALL_COST (2);
10229       return false;
10230
10231     case NEG:
10232       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10233           && (mode == SFmode || !TARGET_VFP_SINGLE))
10234         {
10235           if (GET_CODE (XEXP (x, 0)) == MULT)
10236             {
10237               /* VNMUL.  */
10238               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10239               return true;
10240             }
10241
10242           if (speed_p)
10243             *cost += extra_cost->fp[mode != SFmode].neg;
10244
10245           return false;
10246         }
10247       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10248         {
10249           *cost = LIBCALL_COST (1);
10250           return false;
10251         }
10252
10253       if (mode == SImode)
10254         {
10255           if (GET_CODE (XEXP (x, 0)) == ABS)
10256             {
10257               *cost += COSTS_N_INSNS (1);
10258               /* Assume the non-flag-changing variant.  */
10259               if (speed_p)
10260                 *cost += (extra_cost->alu.log_shift
10261                           + extra_cost->alu.arith_shift);
10262               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10263               return true;
10264             }
10265
10266           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10267               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10268             {
10269               *cost += COSTS_N_INSNS (1);
10270               /* No extra cost for MOV imm and MVN imm.  */
10271               /* If the comparison op is using the flags, there's no further
10272                  cost, otherwise we need to add the cost of the comparison.  */
10273               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10274                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10275                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10276                 {
10277                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10278                   *cost += (COSTS_N_INSNS (1)
10279                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10280                                         0, speed_p)
10281                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10282                                         1, speed_p));
10283                   if (speed_p)
10284                     *cost += extra_cost->alu.arith;
10285                 }
10286               return true;
10287             }
10288
10289           if (speed_p)
10290             *cost += extra_cost->alu.arith;
10291           return false;
10292         }
10293
10294       if (GET_MODE_CLASS (mode) == MODE_INT
10295           && GET_MODE_SIZE (mode) < 4)
10296         {
10297           /* Slightly disparage, as we might need an extend operation.  */
10298           *cost += 1;
10299           if (speed_p)
10300             *cost += extra_cost->alu.arith;
10301           return false;
10302         }
10303
10304       if (mode == DImode)
10305         {
10306           *cost += COSTS_N_INSNS (1);
10307           if (speed_p)
10308             *cost += 2 * extra_cost->alu.arith;
10309           return false;
10310         }
10311
10312       /* Vector mode?  */
10313       *cost = LIBCALL_COST (1);
10314       return false;
10315
10316     case NOT:
10317       if (mode == SImode)
10318         {
10319           rtx shift_op;
10320           rtx shift_reg = NULL;
10321
10322           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10323
10324           if (shift_op)
10325             {
10326               if (shift_reg != NULL)
10327                 {
10328                   if (speed_p)
10329                     *cost += extra_cost->alu.log_shift_reg;
10330                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10331                 }
10332               else if (speed_p)
10333                 *cost += extra_cost->alu.log_shift;
10334               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10335               return true;
10336             }
10337
10338           if (speed_p)
10339             *cost += extra_cost->alu.logical;
10340           return false;
10341         }
10342       if (mode == DImode)
10343         {
10344           *cost += COSTS_N_INSNS (1);
10345           return false;
10346         }
10347
10348       /* Vector mode?  */
10349
10350       *cost += LIBCALL_COST (1);
10351       return false;
10352
10353     case IF_THEN_ELSE:
10354       {
10355         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10356           {
10357             *cost += COSTS_N_INSNS (3);
10358             return true;
10359           }
10360         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10361         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10362
10363         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10364         /* Assume that if one arm of the if_then_else is a register,
10365            that it will be tied with the result and eliminate the
10366            conditional insn.  */
10367         if (REG_P (XEXP (x, 1)))
10368           *cost += op2cost;
10369         else if (REG_P (XEXP (x, 2)))
10370           *cost += op1cost;
10371         else
10372           {
10373             if (speed_p)
10374               {
10375                 if (extra_cost->alu.non_exec_costs_exec)
10376                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10377                 else
10378                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10379               }
10380             else
10381               *cost += op1cost + op2cost;
10382           }
10383       }
10384       return true;
10385
10386     case COMPARE:
10387       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10388         *cost = 0;
10389       else
10390         {
10391           machine_mode op0mode;
10392           /* We'll mostly assume that the cost of a compare is the cost of the
10393              LHS.  However, there are some notable exceptions.  */
10394
10395           /* Floating point compares are never done as side-effects.  */
10396           op0mode = GET_MODE (XEXP (x, 0));
10397           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10398               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10399             {
10400               if (speed_p)
10401                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10402
10403               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10404                 {
10405                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10406                   return true;
10407                 }
10408
10409               return false;
10410             }
10411           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10412             {
10413               *cost = LIBCALL_COST (2);
10414               return false;
10415             }
10416
10417           /* DImode compares normally take two insns.  */
10418           if (op0mode == DImode)
10419             {
10420               *cost += COSTS_N_INSNS (1);
10421               if (speed_p)
10422                 *cost += 2 * extra_cost->alu.arith;
10423               return false;
10424             }
10425
10426           if (op0mode == SImode)
10427             {
10428               rtx shift_op;
10429               rtx shift_reg;
10430
10431               if (XEXP (x, 1) == const0_rtx
10432                   && !(REG_P (XEXP (x, 0))
10433                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10434                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10435                 {
10436                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10437
10438                   /* Multiply operations that set the flags are often
10439                      significantly more expensive.  */
10440                   if (speed_p
10441                       && GET_CODE (XEXP (x, 0)) == MULT
10442                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10443                     *cost += extra_cost->mult[0].flag_setting;
10444
10445                   if (speed_p
10446                       && GET_CODE (XEXP (x, 0)) == PLUS
10447                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10448                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10449                                                             0), 1), mode))
10450                     *cost += extra_cost->mult[0].flag_setting;
10451                   return true;
10452                 }
10453
10454               shift_reg = NULL;
10455               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10456               if (shift_op != NULL)
10457                 {
10458                   if (shift_reg != NULL)
10459                     {
10460                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10461                                          1, speed_p);
10462                       if (speed_p)
10463                         *cost += extra_cost->alu.arith_shift_reg;
10464                     }
10465                   else if (speed_p)
10466                     *cost += extra_cost->alu.arith_shift;
10467                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10468                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10469                   return true;
10470                 }
10471
10472               if (speed_p)
10473                 *cost += extra_cost->alu.arith;
10474               if (CONST_INT_P (XEXP (x, 1))
10475                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10476                 {
10477                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10478                   return true;
10479                 }
10480               return false;
10481             }
10482
10483           /* Vector mode?  */
10484
10485           *cost = LIBCALL_COST (2);
10486           return false;
10487         }
10488       return true;
10489
10490     case EQ:
10491     case NE:
10492     case LT:
10493     case LE:
10494     case GT:
10495     case GE:
10496     case LTU:
10497     case LEU:
10498     case GEU:
10499     case GTU:
10500     case ORDERED:
10501     case UNORDERED:
10502     case UNEQ:
10503     case UNLE:
10504     case UNLT:
10505     case UNGE:
10506     case UNGT:
10507     case LTGT:
10508       if (outer_code == SET)
10509         {
10510           /* Is it a store-flag operation?  */
10511           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10512               && XEXP (x, 1) == const0_rtx)
10513             {
10514               /* Thumb also needs an IT insn.  */
10515               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10516               return true;
10517             }
10518           if (XEXP (x, 1) == const0_rtx)
10519             {
10520               switch (code)
10521                 {
10522                 case LT:
10523                   /* LSR Rd, Rn, #31.  */
10524                   if (speed_p)
10525                     *cost += extra_cost->alu.shift;
10526                   break;
10527
10528                 case EQ:
10529                   /* RSBS T1, Rn, #0
10530                      ADC  Rd, Rn, T1.  */
10531
10532                 case NE:
10533                   /* SUBS T1, Rn, #1
10534                      SBC  Rd, Rn, T1.  */
10535                   *cost += COSTS_N_INSNS (1);
10536                   break;
10537
10538                 case LE:
10539                   /* RSBS T1, Rn, Rn, LSR #31
10540                      ADC  Rd, Rn, T1. */
10541                   *cost += COSTS_N_INSNS (1);
10542                   if (speed_p)
10543                     *cost += extra_cost->alu.arith_shift;
10544                   break;
10545
10546                 case GT:
10547                   /* RSB  Rd, Rn, Rn, ASR #1
10548                      LSR  Rd, Rd, #31.  */
10549                   *cost += COSTS_N_INSNS (1);
10550                   if (speed_p)
10551                     *cost += (extra_cost->alu.arith_shift
10552                               + extra_cost->alu.shift);
10553                   break;
10554
10555                 case GE:
10556                   /* ASR  Rd, Rn, #31
10557                      ADD  Rd, Rn, #1.  */
10558                   *cost += COSTS_N_INSNS (1);
10559                   if (speed_p)
10560                     *cost += extra_cost->alu.shift;
10561                   break;
10562
10563                 default:
10564                   /* Remaining cases are either meaningless or would take
10565                      three insns anyway.  */
10566                   *cost = COSTS_N_INSNS (3);
10567                   break;
10568                 }
10569               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10570               return true;
10571             }
10572           else
10573             {
10574               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10575               if (CONST_INT_P (XEXP (x, 1))
10576                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10577                 {
10578                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10579                   return true;
10580                 }
10581
10582               return false;
10583             }
10584         }
10585       /* Not directly inside a set.  If it involves the condition code
10586          register it must be the condition for a branch, cond_exec or
10587          I_T_E operation.  Since the comparison is performed elsewhere
10588          this is just the control part which has no additional
10589          cost.  */
10590       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10591                && XEXP (x, 1) == const0_rtx)
10592         {
10593           *cost = 0;
10594           return true;
10595         }
10596       return false;
10597
10598     case ABS:
10599       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10600           && (mode == SFmode || !TARGET_VFP_SINGLE))
10601         {
10602           if (speed_p)
10603             *cost += extra_cost->fp[mode != SFmode].neg;
10604
10605           return false;
10606         }
10607       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10608         {
10609           *cost = LIBCALL_COST (1);
10610           return false;
10611         }
10612
10613       if (mode == SImode)
10614         {
10615           if (speed_p)
10616             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10617           return false;
10618         }
10619       /* Vector mode?  */
10620       *cost = LIBCALL_COST (1);
10621       return false;
10622
10623     case SIGN_EXTEND:
10624       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10625           && MEM_P (XEXP (x, 0)))
10626         {
10627           if (mode == DImode)
10628             *cost += COSTS_N_INSNS (1);
10629
10630           if (!speed_p)
10631             return true;
10632
10633           if (GET_MODE (XEXP (x, 0)) == SImode)
10634             *cost += extra_cost->ldst.load;
10635           else
10636             *cost += extra_cost->ldst.load_sign_extend;
10637
10638           if (mode == DImode)
10639             *cost += extra_cost->alu.shift;
10640
10641           return true;
10642         }
10643
10644       /* Widening from less than 32-bits requires an extend operation.  */
10645       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10646         {
10647           /* We have SXTB/SXTH.  */
10648           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10649           if (speed_p)
10650             *cost += extra_cost->alu.extend;
10651         }
10652       else if (GET_MODE (XEXP (x, 0)) != SImode)
10653         {
10654           /* Needs two shifts.  */
10655           *cost += COSTS_N_INSNS (1);
10656           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10657           if (speed_p)
10658             *cost += 2 * extra_cost->alu.shift;
10659         }
10660
10661       /* Widening beyond 32-bits requires one more insn.  */
10662       if (mode == DImode)
10663         {
10664           *cost += COSTS_N_INSNS (1);
10665           if (speed_p)
10666             *cost += extra_cost->alu.shift;
10667         }
10668
10669       return true;
10670
10671     case ZERO_EXTEND:
10672       if ((arm_arch4
10673            || GET_MODE (XEXP (x, 0)) == SImode
10674            || GET_MODE (XEXP (x, 0)) == QImode)
10675           && MEM_P (XEXP (x, 0)))
10676         {
10677           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10678
10679           if (mode == DImode)
10680             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10681
10682           return true;
10683         }
10684
10685       /* Widening from less than 32-bits requires an extend operation.  */
10686       if (GET_MODE (XEXP (x, 0)) == QImode)
10687         {
10688           /* UXTB can be a shorter instruction in Thumb2, but it might
10689              be slower than the AND Rd, Rn, #255 alternative.  When
10690              optimizing for speed it should never be slower to use
10691              AND, and we don't really model 16-bit vs 32-bit insns
10692              here.  */
10693           if (speed_p)
10694             *cost += extra_cost->alu.logical;
10695         }
10696       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10697         {
10698           /* We have UXTB/UXTH.  */
10699           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10700           if (speed_p)
10701             *cost += extra_cost->alu.extend;
10702         }
10703       else if (GET_MODE (XEXP (x, 0)) != SImode)
10704         {
10705           /* Needs two shifts.  It's marginally preferable to use
10706              shifts rather than two BIC instructions as the second
10707              shift may merge with a subsequent insn as a shifter
10708              op.  */
10709           *cost = COSTS_N_INSNS (2);
10710           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10711           if (speed_p)
10712             *cost += 2 * extra_cost->alu.shift;
10713         }
10714
10715       /* Widening beyond 32-bits requires one more insn.  */
10716       if (mode == DImode)
10717         {
10718           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10719         }
10720
10721       return true;
10722
10723     case CONST_INT:
10724       *cost = 0;
10725       /* CONST_INT has no mode, so we cannot tell for sure how many
10726          insns are really going to be needed.  The best we can do is
10727          look at the value passed.  If it fits in SImode, then assume
10728          that's the mode it will be used for.  Otherwise assume it
10729          will be used in DImode.  */
10730       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10731         mode = SImode;
10732       else
10733         mode = DImode;
10734
10735       /* Avoid blowing up in arm_gen_constant ().  */
10736       if (!(outer_code == PLUS
10737             || outer_code == AND
10738             || outer_code == IOR
10739             || outer_code == XOR
10740             || outer_code == MINUS))
10741         outer_code = SET;
10742
10743     const_int_cost:
10744       if (mode == SImode)
10745         {
10746           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10747                                                     INTVAL (x), NULL, NULL,
10748                                                     0, 0));
10749           /* Extra costs?  */
10750         }
10751       else
10752         {
10753           *cost += COSTS_N_INSNS (arm_gen_constant
10754                                   (outer_code, SImode, NULL,
10755                                    trunc_int_for_mode (INTVAL (x), SImode),
10756                                    NULL, NULL, 0, 0)
10757                                   + arm_gen_constant (outer_code, SImode, NULL,
10758                                                       INTVAL (x) >> 32, NULL,
10759                                                       NULL, 0, 0));
10760           /* Extra costs?  */
10761         }
10762
10763       return true;
10764
10765     case CONST:
10766     case LABEL_REF:
10767     case SYMBOL_REF:
10768       if (speed_p)
10769         {
10770           if (arm_arch_thumb2 && !flag_pic)
10771             *cost += COSTS_N_INSNS (1);
10772           else
10773             *cost += extra_cost->ldst.load;
10774         }
10775       else
10776         *cost += COSTS_N_INSNS (1);
10777
10778       if (flag_pic)
10779         {
10780           *cost += COSTS_N_INSNS (1);
10781           if (speed_p)
10782             *cost += extra_cost->alu.arith;
10783         }
10784
10785       return true;
10786
10787     case CONST_FIXED:
10788       *cost = COSTS_N_INSNS (4);
10789       /* Fixme.  */
10790       return true;
10791
10792     case CONST_DOUBLE:
10793       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10794           && (mode == SFmode || !TARGET_VFP_SINGLE))
10795         {
10796           if (vfp3_const_double_rtx (x))
10797             {
10798               if (speed_p)
10799                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10800               return true;
10801             }
10802
10803           if (speed_p)
10804             {
10805               if (mode == DFmode)
10806                 *cost += extra_cost->ldst.loadd;
10807               else
10808                 *cost += extra_cost->ldst.loadf;
10809             }
10810           else
10811             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10812
10813           return true;
10814         }
10815       *cost = COSTS_N_INSNS (4);
10816       return true;
10817
10818     case CONST_VECTOR:
10819       /* Fixme.  */
10820       if (TARGET_NEON
10821           && TARGET_HARD_FLOAT
10822           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10823           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10824         *cost = COSTS_N_INSNS (1);
10825       else
10826         *cost = COSTS_N_INSNS (4);
10827       return true;
10828
10829     case HIGH:
10830     case LO_SUM:
10831       /* When optimizing for size, we prefer constant pool entries to
10832          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10833       if (!speed_p)
10834         *cost += 1;
10835       return true;
10836
10837     case CLZ:
10838       if (speed_p)
10839         *cost += extra_cost->alu.clz;
10840       return false;
10841
10842     case SMIN:
10843       if (XEXP (x, 1) == const0_rtx)
10844         {
10845           if (speed_p)
10846             *cost += extra_cost->alu.log_shift;
10847           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10848           return true;
10849         }
10850       /* Fall through.  */
10851     case SMAX:
10852     case UMIN:
10853     case UMAX:
10854       *cost += COSTS_N_INSNS (1);
10855       return false;
10856
10857     case TRUNCATE:
10858       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10859           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10860           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10861           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10862           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10863                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10864               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10865                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10866                       == ZERO_EXTEND))))
10867         {
10868           if (speed_p)
10869             *cost += extra_cost->mult[1].extend;
10870           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10871                               ZERO_EXTEND, 0, speed_p)
10872                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10873                                 ZERO_EXTEND, 0, speed_p));
10874           return true;
10875         }
10876       *cost = LIBCALL_COST (1);
10877       return false;
10878
10879     case UNSPEC_VOLATILE:
10880     case UNSPEC:
10881       return arm_unspec_cost (x, outer_code, speed_p, cost);
10882
10883     case PC:
10884       /* Reading the PC is like reading any other register.  Writing it
10885          is more expensive, but we take that into account elsewhere.  */
10886       *cost = 0;
10887       return true;
10888
10889     case ZERO_EXTRACT:
10890       /* TODO: Simple zero_extract of bottom bits using AND.  */
10891       /* Fall through.  */
10892     case SIGN_EXTRACT:
10893       if (arm_arch6
10894           && mode == SImode
10895           && CONST_INT_P (XEXP (x, 1))
10896           && CONST_INT_P (XEXP (x, 2)))
10897         {
10898           if (speed_p)
10899             *cost += extra_cost->alu.bfx;
10900           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10901           return true;
10902         }
10903       /* Without UBFX/SBFX, need to resort to shift operations.  */
10904       *cost += COSTS_N_INSNS (1);
10905       if (speed_p)
10906         *cost += 2 * extra_cost->alu.shift;
10907       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10908       return true;
10909
10910     case FLOAT_EXTEND:
10911       if (TARGET_HARD_FLOAT)
10912         {
10913           if (speed_p)
10914             *cost += extra_cost->fp[mode == DFmode].widen;
10915           if (!TARGET_VFP5
10916               && GET_MODE (XEXP (x, 0)) == HFmode)
10917             {
10918               /* Pre v8, widening HF->DF is a two-step process, first
10919                  widening to SFmode.  */
10920               *cost += COSTS_N_INSNS (1);
10921               if (speed_p)
10922                 *cost += extra_cost->fp[0].widen;
10923             }
10924           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10925           return true;
10926         }
10927
10928       *cost = LIBCALL_COST (1);
10929       return false;
10930
10931     case FLOAT_TRUNCATE:
10932       if (TARGET_HARD_FLOAT)
10933         {
10934           if (speed_p)
10935             *cost += extra_cost->fp[mode == DFmode].narrow;
10936           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10937           return true;
10938           /* Vector modes?  */
10939         }
10940       *cost = LIBCALL_COST (1);
10941       return false;
10942
10943     case FMA:
10944       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10945         {
10946           rtx op0 = XEXP (x, 0);
10947           rtx op1 = XEXP (x, 1);
10948           rtx op2 = XEXP (x, 2);
10949
10950
10951           /* vfms or vfnma.  */
10952           if (GET_CODE (op0) == NEG)
10953             op0 = XEXP (op0, 0);
10954
10955           /* vfnms or vfnma.  */
10956           if (GET_CODE (op2) == NEG)
10957             op2 = XEXP (op2, 0);
10958
10959           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10960           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10961           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10962
10963           if (speed_p)
10964             *cost += extra_cost->fp[mode ==DFmode].fma;
10965
10966           return true;
10967         }
10968
10969       *cost = LIBCALL_COST (3);
10970       return false;
10971
10972     case FIX:
10973     case UNSIGNED_FIX:
10974       if (TARGET_HARD_FLOAT)
10975         {
10976           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10977              a vcvt fixed-point conversion.  */
10978           if (code == FIX && mode == SImode
10979               && GET_CODE (XEXP (x, 0)) == FIX
10980               && GET_MODE (XEXP (x, 0)) == SFmode
10981               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10982               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10983                  > 0)
10984             {
10985               if (speed_p)
10986                 *cost += extra_cost->fp[0].toint;
10987
10988               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10989                                  code, 0, speed_p);
10990               return true;
10991             }
10992
10993           if (GET_MODE_CLASS (mode) == MODE_INT)
10994             {
10995               mode = GET_MODE (XEXP (x, 0));
10996               if (speed_p)
10997                 *cost += extra_cost->fp[mode == DFmode].toint;
10998               /* Strip of the 'cost' of rounding towards zero.  */
10999               if (GET_CODE (XEXP (x, 0)) == FIX)
11000                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11001                                    0, speed_p);
11002               else
11003                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11004               /* ??? Increase the cost to deal with transferring from
11005                  FP -> CORE registers?  */
11006               return true;
11007             }
11008           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11009                    && TARGET_VFP5)
11010             {
11011               if (speed_p)
11012                 *cost += extra_cost->fp[mode == DFmode].roundint;
11013               return false;
11014             }
11015           /* Vector costs? */
11016         }
11017       *cost = LIBCALL_COST (1);
11018       return false;
11019
11020     case FLOAT:
11021     case UNSIGNED_FLOAT:
11022       if (TARGET_HARD_FLOAT)
11023         {
11024           /* ??? Increase the cost to deal with transferring from CORE
11025              -> FP registers?  */
11026           if (speed_p)
11027             *cost += extra_cost->fp[mode == DFmode].fromint;
11028           return false;
11029         }
11030       *cost = LIBCALL_COST (1);
11031       return false;
11032
11033     case CALL:
11034       return true;
11035
11036     case ASM_OPERANDS:
11037       {
11038       /* Just a guess.  Guess number of instructions in the asm
11039          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11040          though (see PR60663).  */
11041         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11042         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11043
11044         *cost = COSTS_N_INSNS (asm_length + num_operands);
11045         return true;
11046       }
11047     default:
11048       if (mode != VOIDmode)
11049         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11050       else
11051         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11052       return false;
11053     }
11054 }
11055
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11057
11058 /* RTX costs entry point.  */
11059
11060 static bool
11061 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11062                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11063 {
11064   bool result;
11065   int code = GET_CODE (x);
11066   gcc_assert (current_tune->insn_extra_cost);
11067
11068   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11069                                 (enum rtx_code) outer_code,
11070                                 current_tune->insn_extra_cost,
11071                                 total, speed);
11072
11073   if (dump_file && arm_verbose_cost)
11074     {
11075       print_rtl_single (dump_file, x);
11076       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11077                *total, result ? "final" : "partial");
11078     }
11079   return result;
11080 }
11081
11082 /* All address computations that can be done are free, but rtx cost returns
11083    the same for practically all of them.  So we weight the different types
11084    of address here in the order (most pref first):
11085    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11086 static inline int
11087 arm_arm_address_cost (rtx x)
11088 {
11089   enum rtx_code c  = GET_CODE (x);
11090
11091   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11092     return 0;
11093   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11094     return 10;
11095
11096   if (c == PLUS)
11097     {
11098       if (CONST_INT_P (XEXP (x, 1)))
11099         return 2;
11100
11101       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11102         return 3;
11103
11104       return 4;
11105     }
11106
11107   return 6;
11108 }
11109
11110 static inline int
11111 arm_thumb_address_cost (rtx x)
11112 {
11113   enum rtx_code c  = GET_CODE (x);
11114
11115   if (c == REG)
11116     return 1;
11117   if (c == PLUS
11118       && REG_P (XEXP (x, 0))
11119       && CONST_INT_P (XEXP (x, 1)))
11120     return 1;
11121
11122   return 2;
11123 }
11124
11125 static int
11126 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11127                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11128 {
11129   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11130 }
11131
11132 /* Adjust cost hook for XScale.  */
11133 static bool
11134 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11135                           int * cost)
11136 {
11137   /* Some true dependencies can have a higher cost depending
11138      on precisely how certain input operands are used.  */
11139   if (dep_type == 0
11140       && recog_memoized (insn) >= 0
11141       && recog_memoized (dep) >= 0)
11142     {
11143       int shift_opnum = get_attr_shift (insn);
11144       enum attr_type attr_type = get_attr_type (dep);
11145
11146       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11147          operand for INSN.  If we have a shifted input operand and the
11148          instruction we depend on is another ALU instruction, then we may
11149          have to account for an additional stall.  */
11150       if (shift_opnum != 0
11151           && (attr_type == TYPE_ALU_SHIFT_IMM
11152               || attr_type == TYPE_ALUS_SHIFT_IMM
11153               || attr_type == TYPE_LOGIC_SHIFT_IMM
11154               || attr_type == TYPE_LOGICS_SHIFT_IMM
11155               || attr_type == TYPE_ALU_SHIFT_REG
11156               || attr_type == TYPE_ALUS_SHIFT_REG
11157               || attr_type == TYPE_LOGIC_SHIFT_REG
11158               || attr_type == TYPE_LOGICS_SHIFT_REG
11159               || attr_type == TYPE_MOV_SHIFT
11160               || attr_type == TYPE_MVN_SHIFT
11161               || attr_type == TYPE_MOV_SHIFT_REG
11162               || attr_type == TYPE_MVN_SHIFT_REG))
11163         {
11164           rtx shifted_operand;
11165           int opno;
11166
11167           /* Get the shifted operand.  */
11168           extract_insn (insn);
11169           shifted_operand = recog_data.operand[shift_opnum];
11170
11171           /* Iterate over all the operands in DEP.  If we write an operand
11172              that overlaps with SHIFTED_OPERAND, then we have increase the
11173              cost of this dependency.  */
11174           extract_insn (dep);
11175           preprocess_constraints (dep);
11176           for (opno = 0; opno < recog_data.n_operands; opno++)
11177             {
11178               /* We can ignore strict inputs.  */
11179               if (recog_data.operand_type[opno] == OP_IN)
11180                 continue;
11181
11182               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11183                                            shifted_operand))
11184                 {
11185                   *cost = 2;
11186                   return false;
11187                 }
11188             }
11189         }
11190     }
11191   return true;
11192 }
11193
11194 /* Adjust cost hook for Cortex A9.  */
11195 static bool
11196 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11197                              int * cost)
11198 {
11199   switch (dep_type)
11200     {
11201     case REG_DEP_ANTI:
11202       *cost = 0;
11203       return false;
11204
11205     case REG_DEP_TRUE:
11206     case REG_DEP_OUTPUT:
11207         if (recog_memoized (insn) >= 0
11208             && recog_memoized (dep) >= 0)
11209           {
11210             if (GET_CODE (PATTERN (insn)) == SET)
11211               {
11212                 if (GET_MODE_CLASS
11213                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11214                   || GET_MODE_CLASS
11215                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11216                   {
11217                     enum attr_type attr_type_insn = get_attr_type (insn);
11218                     enum attr_type attr_type_dep = get_attr_type (dep);
11219
11220                     /* By default all dependencies of the form
11221                        s0 = s0 <op> s1
11222                        s0 = s0 <op> s2
11223                        have an extra latency of 1 cycle because
11224                        of the input and output dependency in this
11225                        case. However this gets modeled as an true
11226                        dependency and hence all these checks.  */
11227                     if (REG_P (SET_DEST (PATTERN (insn)))
11228                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11229                       {
11230                         /* FMACS is a special case where the dependent
11231                            instruction can be issued 3 cycles before
11232                            the normal latency in case of an output
11233                            dependency.  */
11234                         if ((attr_type_insn == TYPE_FMACS
11235                              || attr_type_insn == TYPE_FMACD)
11236                             && (attr_type_dep == TYPE_FMACS
11237                                 || attr_type_dep == TYPE_FMACD))
11238                           {
11239                             if (dep_type == REG_DEP_OUTPUT)
11240                               *cost = insn_default_latency (dep) - 3;
11241                             else
11242                               *cost = insn_default_latency (dep);
11243                             return false;
11244                           }
11245                         else
11246                           {
11247                             if (dep_type == REG_DEP_OUTPUT)
11248                               *cost = insn_default_latency (dep) + 1;
11249                             else
11250                               *cost = insn_default_latency (dep);
11251                           }
11252                         return false;
11253                       }
11254                   }
11255               }
11256           }
11257         break;
11258
11259     default:
11260       gcc_unreachable ();
11261     }
11262
11263   return true;
11264 }
11265
11266 /* Adjust cost hook for FA726TE.  */
11267 static bool
11268 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11269                            int * cost)
11270 {
11271   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11272      have penalty of 3.  */
11273   if (dep_type == REG_DEP_TRUE
11274       && recog_memoized (insn) >= 0
11275       && recog_memoized (dep) >= 0
11276       && get_attr_conds (dep) == CONDS_SET)
11277     {
11278       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11279       if (get_attr_conds (insn) == CONDS_USE
11280           && get_attr_type (insn) != TYPE_BRANCH)
11281         {
11282           *cost = 3;
11283           return false;
11284         }
11285
11286       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11287           || get_attr_conds (insn) == CONDS_USE)
11288         {
11289           *cost = 0;
11290           return false;
11291         }
11292     }
11293
11294   return true;
11295 }
11296
11297 /* Implement TARGET_REGISTER_MOVE_COST.
11298
11299    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11300    it is typically more expensive than a single memory access.  We set
11301    the cost to less than two memory accesses so that floating
11302    point to integer conversion does not go through memory.  */
11303
11304 int
11305 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11306                         reg_class_t from, reg_class_t to)
11307 {
11308   if (TARGET_32BIT)
11309     {
11310       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11311           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11312         return 15;
11313       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11314                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11315         return 4;
11316       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11317         return 20;
11318       else
11319         return 2;
11320     }
11321   else
11322     {
11323       if (from == HI_REGS || to == HI_REGS)
11324         return 4;
11325       else
11326         return 2;
11327     }
11328 }
11329
11330 /* Implement TARGET_MEMORY_MOVE_COST.  */
11331
11332 int
11333 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11334                       bool in ATTRIBUTE_UNUSED)
11335 {
11336   if (TARGET_32BIT)
11337     return 10;
11338   else
11339     {
11340       if (GET_MODE_SIZE (mode) < 4)
11341         return 8;
11342       else
11343         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11344     }
11345 }
11346
11347 /* Vectorizer cost model implementation.  */
11348
11349 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11350 static int
11351 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11352                                 tree vectype,
11353                                 int misalign ATTRIBUTE_UNUSED)
11354 {
11355   unsigned elements;
11356
11357   switch (type_of_cost)
11358     {
11359       case scalar_stmt:
11360         return current_tune->vec_costs->scalar_stmt_cost;
11361
11362       case scalar_load:
11363         return current_tune->vec_costs->scalar_load_cost;
11364
11365       case scalar_store:
11366         return current_tune->vec_costs->scalar_store_cost;
11367
11368       case vector_stmt:
11369         return current_tune->vec_costs->vec_stmt_cost;
11370
11371       case vector_load:
11372         return current_tune->vec_costs->vec_align_load_cost;
11373
11374       case vector_store:
11375         return current_tune->vec_costs->vec_store_cost;
11376
11377       case vec_to_scalar:
11378         return current_tune->vec_costs->vec_to_scalar_cost;
11379
11380       case scalar_to_vec:
11381         return current_tune->vec_costs->scalar_to_vec_cost;
11382
11383       case unaligned_load:
11384       case vector_gather_load:
11385         return current_tune->vec_costs->vec_unalign_load_cost;
11386
11387       case unaligned_store:
11388       case vector_scatter_store:
11389         return current_tune->vec_costs->vec_unalign_store_cost;
11390
11391       case cond_branch_taken:
11392         return current_tune->vec_costs->cond_taken_branch_cost;
11393
11394       case cond_branch_not_taken:
11395         return current_tune->vec_costs->cond_not_taken_branch_cost;
11396
11397       case vec_perm:
11398       case vec_promote_demote:
11399         return current_tune->vec_costs->vec_stmt_cost;
11400
11401       case vec_construct:
11402         elements = TYPE_VECTOR_SUBPARTS (vectype);
11403         return elements / 2 + 1;
11404
11405       default:
11406         gcc_unreachable ();
11407     }
11408 }
11409
11410 /* Implement targetm.vectorize.add_stmt_cost.  */
11411
11412 static unsigned
11413 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11414                    struct _stmt_vec_info *stmt_info, int misalign,
11415                    enum vect_cost_model_location where)
11416 {
11417   unsigned *cost = (unsigned *) data;
11418   unsigned retval = 0;
11419
11420   if (flag_vect_cost_model)
11421     {
11422       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11423       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11424
11425       /* Statements in an inner loop relative to the loop being
11426          vectorized are weighted more heavily.  The value here is
11427          arbitrary and could potentially be improved with analysis.  */
11428       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11429         count *= 50;  /* FIXME.  */
11430
11431       retval = (unsigned) (count * stmt_cost);
11432       cost[where] += retval;
11433     }
11434
11435   return retval;
11436 }
11437
11438 /* Return true if and only if this insn can dual-issue only as older.  */
11439 static bool
11440 cortexa7_older_only (rtx_insn *insn)
11441 {
11442   if (recog_memoized (insn) < 0)
11443     return false;
11444
11445   switch (get_attr_type (insn))
11446     {
11447     case TYPE_ALU_DSP_REG:
11448     case TYPE_ALU_SREG:
11449     case TYPE_ALUS_SREG:
11450     case TYPE_LOGIC_REG:
11451     case TYPE_LOGICS_REG:
11452     case TYPE_ADC_REG:
11453     case TYPE_ADCS_REG:
11454     case TYPE_ADR:
11455     case TYPE_BFM:
11456     case TYPE_REV:
11457     case TYPE_MVN_REG:
11458     case TYPE_SHIFT_IMM:
11459     case TYPE_SHIFT_REG:
11460     case TYPE_LOAD_BYTE:
11461     case TYPE_LOAD_4:
11462     case TYPE_STORE_4:
11463     case TYPE_FFARITHS:
11464     case TYPE_FADDS:
11465     case TYPE_FFARITHD:
11466     case TYPE_FADDD:
11467     case TYPE_FMOV:
11468     case TYPE_F_CVT:
11469     case TYPE_FCMPS:
11470     case TYPE_FCMPD:
11471     case TYPE_FCONSTS:
11472     case TYPE_FCONSTD:
11473     case TYPE_FMULS:
11474     case TYPE_FMACS:
11475     case TYPE_FMULD:
11476     case TYPE_FMACD:
11477     case TYPE_FDIVS:
11478     case TYPE_FDIVD:
11479     case TYPE_F_MRC:
11480     case TYPE_F_MRRC:
11481     case TYPE_F_FLAG:
11482     case TYPE_F_LOADS:
11483     case TYPE_F_STORES:
11484       return true;
11485     default:
11486       return false;
11487     }
11488 }
11489
11490 /* Return true if and only if this insn can dual-issue as younger.  */
11491 static bool
11492 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11493 {
11494   if (recog_memoized (insn) < 0)
11495     {
11496       if (verbose > 5)
11497         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11498       return false;
11499     }
11500
11501   switch (get_attr_type (insn))
11502     {
11503     case TYPE_ALU_IMM:
11504     case TYPE_ALUS_IMM:
11505     case TYPE_LOGIC_IMM:
11506     case TYPE_LOGICS_IMM:
11507     case TYPE_EXTEND:
11508     case TYPE_MVN_IMM:
11509     case TYPE_MOV_IMM:
11510     case TYPE_MOV_REG:
11511     case TYPE_MOV_SHIFT:
11512     case TYPE_MOV_SHIFT_REG:
11513     case TYPE_BRANCH:
11514     case TYPE_CALL:
11515       return true;
11516     default:
11517       return false;
11518     }
11519 }
11520
11521
11522 /* Look for an instruction that can dual issue only as an older
11523    instruction, and move it in front of any instructions that can
11524    dual-issue as younger, while preserving the relative order of all
11525    other instructions in the ready list.  This is a hueuristic to help
11526    dual-issue in later cycles, by postponing issue of more flexible
11527    instructions.  This heuristic may affect dual issue opportunities
11528    in the current cycle.  */
11529 static void
11530 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11531                         int *n_readyp, int clock)
11532 {
11533   int i;
11534   int first_older_only = -1, first_younger = -1;
11535
11536   if (verbose > 5)
11537     fprintf (file,
11538              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11539              clock,
11540              *n_readyp);
11541
11542   /* Traverse the ready list from the head (the instruction to issue
11543      first), and looking for the first instruction that can issue as
11544      younger and the first instruction that can dual-issue only as
11545      older.  */
11546   for (i = *n_readyp - 1; i >= 0; i--)
11547     {
11548       rtx_insn *insn = ready[i];
11549       if (cortexa7_older_only (insn))
11550         {
11551           first_older_only = i;
11552           if (verbose > 5)
11553             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11554           break;
11555         }
11556       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11557         first_younger = i;
11558     }
11559
11560   /* Nothing to reorder because either no younger insn found or insn
11561      that can dual-issue only as older appears before any insn that
11562      can dual-issue as younger.  */
11563   if (first_younger == -1)
11564     {
11565       if (verbose > 5)
11566         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11567       return;
11568     }
11569
11570   /* Nothing to reorder because no older-only insn in the ready list.  */
11571   if (first_older_only == -1)
11572     {
11573       if (verbose > 5)
11574         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11575       return;
11576     }
11577
11578   /* Move first_older_only insn before first_younger.  */
11579   if (verbose > 5)
11580     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11581              INSN_UID(ready [first_older_only]),
11582              INSN_UID(ready [first_younger]));
11583   rtx_insn *first_older_only_insn = ready [first_older_only];
11584   for (i = first_older_only; i < first_younger; i++)
11585     {
11586       ready[i] = ready[i+1];
11587     }
11588
11589   ready[i] = first_older_only_insn;
11590   return;
11591 }
11592
11593 /* Implement TARGET_SCHED_REORDER. */
11594 static int
11595 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11596                    int clock)
11597 {
11598   switch (arm_tune)
11599     {
11600     case TARGET_CPU_cortexa7:
11601       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11602       break;
11603     default:
11604       /* Do nothing for other cores.  */
11605       break;
11606     }
11607
11608   return arm_issue_rate ();
11609 }
11610
11611 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11612    It corrects the value of COST based on the relationship between
11613    INSN and DEP through the dependence LINK.  It returns the new
11614    value. There is a per-core adjust_cost hook to adjust scheduler costs
11615    and the per-core hook can choose to completely override the generic
11616    adjust_cost function. Only put bits of code into arm_adjust_cost that
11617    are common across all cores.  */
11618 static int
11619 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11620                  unsigned int)
11621 {
11622   rtx i_pat, d_pat;
11623
11624  /* When generating Thumb-1 code, we want to place flag-setting operations
11625     close to a conditional branch which depends on them, so that we can
11626     omit the comparison. */
11627   if (TARGET_THUMB1
11628       && dep_type == 0
11629       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11630       && recog_memoized (dep) >= 0
11631       && get_attr_conds (dep) == CONDS_SET)
11632     return 0;
11633
11634   if (current_tune->sched_adjust_cost != NULL)
11635     {
11636       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11637         return cost;
11638     }
11639
11640   /* XXX Is this strictly true?  */
11641   if (dep_type == REG_DEP_ANTI
11642       || dep_type == REG_DEP_OUTPUT)
11643     return 0;
11644
11645   /* Call insns don't incur a stall, even if they follow a load.  */
11646   if (dep_type == 0
11647       && CALL_P (insn))
11648     return 1;
11649
11650   if ((i_pat = single_set (insn)) != NULL
11651       && MEM_P (SET_SRC (i_pat))
11652       && (d_pat = single_set (dep)) != NULL
11653       && MEM_P (SET_DEST (d_pat)))
11654     {
11655       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11656       /* This is a load after a store, there is no conflict if the load reads
11657          from a cached area.  Assume that loads from the stack, and from the
11658          constant pool are cached, and that others will miss.  This is a
11659          hack.  */
11660
11661       if ((GET_CODE (src_mem) == SYMBOL_REF
11662            && CONSTANT_POOL_ADDRESS_P (src_mem))
11663           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11664           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11665           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11666         return 1;
11667     }
11668
11669   return cost;
11670 }
11671
11672 int
11673 arm_max_conditional_execute (void)
11674 {
11675   return max_insns_skipped;
11676 }
11677
11678 static int
11679 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11680 {
11681   if (TARGET_32BIT)
11682     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11683   else
11684     return (optimize > 0) ? 2 : 0;
11685 }
11686
11687 static int
11688 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11689 {
11690   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11691 }
11692
11693 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11694    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11695    sequences of non-executed instructions in IT blocks probably take the same
11696    amount of time as executed instructions (and the IT instruction itself takes
11697    space in icache).  This function was experimentally determined to give good
11698    results on a popular embedded benchmark.  */
11699
11700 static int
11701 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11702 {
11703   return (TARGET_32BIT && speed_p) ? 1
11704          : arm_default_branch_cost (speed_p, predictable_p);
11705 }
11706
11707 static int
11708 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11709 {
11710   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11711 }
11712
11713 static bool fp_consts_inited = false;
11714
11715 static REAL_VALUE_TYPE value_fp0;
11716
11717 static void
11718 init_fp_table (void)
11719 {
11720   REAL_VALUE_TYPE r;
11721
11722   r = REAL_VALUE_ATOF ("0", DFmode);
11723   value_fp0 = r;
11724   fp_consts_inited = true;
11725 }
11726
11727 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11728 int
11729 arm_const_double_rtx (rtx x)
11730 {
11731   const REAL_VALUE_TYPE *r;
11732
11733   if (!fp_consts_inited)
11734     init_fp_table ();
11735
11736   r = CONST_DOUBLE_REAL_VALUE (x);
11737   if (REAL_VALUE_MINUS_ZERO (*r))
11738     return 0;
11739
11740   if (real_equal (r, &value_fp0))
11741     return 1;
11742
11743   return 0;
11744 }
11745
11746 /* VFPv3 has a fairly wide range of representable immediates, formed from
11747    "quarter-precision" floating-point values. These can be evaluated using this
11748    formula (with ^ for exponentiation):
11749
11750      -1^s * n * 2^-r
11751
11752    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11753    16 <= n <= 31 and 0 <= r <= 7.
11754
11755    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11756
11757      - A (most-significant) is the sign bit.
11758      - BCD are the exponent (encoded as r XOR 3).
11759      - EFGH are the mantissa (encoded as n - 16).
11760 */
11761
11762 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11763    fconst[sd] instruction, or -1 if X isn't suitable.  */
11764 static int
11765 vfp3_const_double_index (rtx x)
11766 {
11767   REAL_VALUE_TYPE r, m;
11768   int sign, exponent;
11769   unsigned HOST_WIDE_INT mantissa, mant_hi;
11770   unsigned HOST_WIDE_INT mask;
11771   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11772   bool fail;
11773
11774   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11775     return -1;
11776
11777   r = *CONST_DOUBLE_REAL_VALUE (x);
11778
11779   /* We can't represent these things, so detect them first.  */
11780   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11781     return -1;
11782
11783   /* Extract sign, exponent and mantissa.  */
11784   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11785   r = real_value_abs (&r);
11786   exponent = REAL_EXP (&r);
11787   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11788      highest (sign) bit, with a fixed binary point at bit point_pos.
11789      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11790      bits for the mantissa, this may fail (low bits would be lost).  */
11791   real_ldexp (&m, &r, point_pos - exponent);
11792   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11793   mantissa = w.elt (0);
11794   mant_hi = w.elt (1);
11795
11796   /* If there are bits set in the low part of the mantissa, we can't
11797      represent this value.  */
11798   if (mantissa != 0)
11799     return -1;
11800
11801   /* Now make it so that mantissa contains the most-significant bits, and move
11802      the point_pos to indicate that the least-significant bits have been
11803      discarded.  */
11804   point_pos -= HOST_BITS_PER_WIDE_INT;
11805   mantissa = mant_hi;
11806
11807   /* We can permit four significant bits of mantissa only, plus a high bit
11808      which is always 1.  */
11809   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11810   if ((mantissa & mask) != 0)
11811     return -1;
11812
11813   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11814   mantissa >>= point_pos - 5;
11815
11816   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11817      floating-point immediate zero with Neon using an integer-zero load, but
11818      that case is handled elsewhere.)  */
11819   if (mantissa == 0)
11820     return -1;
11821
11822   gcc_assert (mantissa >= 16 && mantissa <= 31);
11823
11824   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11825      normalized significands are in the range [1, 2). (Our mantissa is shifted
11826      left 4 places at this point relative to normalized IEEE754 values).  GCC
11827      internally uses [0.5, 1) (see real.c), so the exponent returned from
11828      REAL_EXP must be altered.  */
11829   exponent = 5 - exponent;
11830
11831   if (exponent < 0 || exponent > 7)
11832     return -1;
11833
11834   /* Sign, mantissa and exponent are now in the correct form to plug into the
11835      formula described in the comment above.  */
11836   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11837 }
11838
11839 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11840 int
11841 vfp3_const_double_rtx (rtx x)
11842 {
11843   if (!TARGET_VFP3)
11844     return 0;
11845
11846   return vfp3_const_double_index (x) != -1;
11847 }
11848
11849 /* Recognize immediates which can be used in various Neon instructions. Legal
11850    immediates are described by the following table (for VMVN variants, the
11851    bitwise inverse of the constant shown is recognized. In either case, VMOV
11852    is output and the correct instruction to use for a given constant is chosen
11853    by the assembler). The constant shown is replicated across all elements of
11854    the destination vector.
11855
11856    insn elems variant constant (binary)
11857    ---- ----- ------- -----------------
11858    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11859    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11860    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11861    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11862    vmov  i16     4    00000000 abcdefgh
11863    vmov  i16     5    abcdefgh 00000000
11864    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11865    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11866    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11867    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11868    vmvn  i16    10    00000000 abcdefgh
11869    vmvn  i16    11    abcdefgh 00000000
11870    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11871    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11872    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11873    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11874    vmov   i8    16    abcdefgh
11875    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11876                       eeeeeeee ffffffff gggggggg hhhhhhhh
11877    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11878    vmov  f32    19    00000000 00000000 00000000 00000000
11879
11880    For case 18, B = !b. Representable values are exactly those accepted by
11881    vfp3_const_double_index, but are output as floating-point numbers rather
11882    than indices.
11883
11884    For case 19, we will change it to vmov.i32 when assembling.
11885
11886    Variants 0-5 (inclusive) may also be used as immediates for the second
11887    operand of VORR/VBIC instructions.
11888
11889    The INVERSE argument causes the bitwise inverse of the given operand to be
11890    recognized instead (used for recognizing legal immediates for the VAND/VORN
11891    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11892    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11893    output, rather than the real insns vbic/vorr).
11894
11895    INVERSE makes no difference to the recognition of float vectors.
11896
11897    The return value is the variant of immediate as shown in the above table, or
11898    -1 if the given value doesn't match any of the listed patterns.
11899 */
11900 static int
11901 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11902                       rtx *modconst, int *elementwidth)
11903 {
11904 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11905   matches = 1;                                  \
11906   for (i = 0; i < idx; i += (STRIDE))           \
11907     if (!(TEST))                                \
11908       matches = 0;                              \
11909   if (matches)                                  \
11910     {                                           \
11911       immtype = (CLASS);                        \
11912       elsize = (ELSIZE);                        \
11913       break;                                    \
11914     }
11915
11916   unsigned int i, elsize = 0, idx = 0, n_elts;
11917   unsigned int innersize;
11918   unsigned char bytes[16];
11919   int immtype = -1, matches;
11920   unsigned int invmask = inverse ? 0xff : 0;
11921   bool vector = GET_CODE (op) == CONST_VECTOR;
11922
11923   if (vector)
11924     n_elts = CONST_VECTOR_NUNITS (op);
11925   else
11926     {
11927       n_elts = 1;
11928       if (mode == VOIDmode)
11929         mode = DImode;
11930     }
11931
11932   innersize = GET_MODE_UNIT_SIZE (mode);
11933
11934   /* Vectors of float constants.  */
11935   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11936     {
11937       rtx el0 = CONST_VECTOR_ELT (op, 0);
11938
11939       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11940         return -1;
11941
11942       /* FP16 vectors cannot be represented.  */
11943       if (GET_MODE_INNER (mode) == HFmode)
11944         return -1;
11945
11946       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11947          are distinct in this context.  */
11948       if (!const_vec_duplicate_p (op))
11949         return -1;
11950
11951       if (modconst)
11952         *modconst = CONST_VECTOR_ELT (op, 0);
11953
11954       if (elementwidth)
11955         *elementwidth = 0;
11956
11957       if (el0 == CONST0_RTX (GET_MODE (el0)))
11958         return 19;
11959       else
11960         return 18;
11961     }
11962
11963   /* The tricks done in the code below apply for little-endian vector layout.
11964      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11965      FIXME: Implement logic for big-endian vectors.  */
11966   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11967     return -1;
11968
11969   /* Splat vector constant out into a byte vector.  */
11970   for (i = 0; i < n_elts; i++)
11971     {
11972       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11973       unsigned HOST_WIDE_INT elpart;
11974
11975       gcc_assert (CONST_INT_P (el));
11976       elpart = INTVAL (el);
11977
11978       for (unsigned int byte = 0; byte < innersize; byte++)
11979         {
11980           bytes[idx++] = (elpart & 0xff) ^ invmask;
11981           elpart >>= BITS_PER_UNIT;
11982         }
11983     }
11984
11985   /* Sanity check.  */
11986   gcc_assert (idx == GET_MODE_SIZE (mode));
11987
11988   do
11989     {
11990       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11991                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11992
11993       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11994                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11995
11996       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11997                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11998
11999       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12000                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12001
12002       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12003
12004       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12005
12006       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12007                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12008
12009       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12010                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12011
12012       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12013                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12014
12015       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12016                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12017
12018       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12019
12020       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12021
12022       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12023                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12024
12025       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12026                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12027
12028       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12029                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12030
12031       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12032                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12033
12034       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12035
12036       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12037                         && bytes[i] == bytes[(i + 8) % idx]);
12038     }
12039   while (0);
12040
12041   if (immtype == -1)
12042     return -1;
12043
12044   if (elementwidth)
12045     *elementwidth = elsize;
12046
12047   if (modconst)
12048     {
12049       unsigned HOST_WIDE_INT imm = 0;
12050
12051       /* Un-invert bytes of recognized vector, if necessary.  */
12052       if (invmask != 0)
12053         for (i = 0; i < idx; i++)
12054           bytes[i] ^= invmask;
12055
12056       if (immtype == 17)
12057         {
12058           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12059           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12060
12061           for (i = 0; i < 8; i++)
12062             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12063                    << (i * BITS_PER_UNIT);
12064
12065           *modconst = GEN_INT (imm);
12066         }
12067       else
12068         {
12069           unsigned HOST_WIDE_INT imm = 0;
12070
12071           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12072             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12073
12074           *modconst = GEN_INT (imm);
12075         }
12076     }
12077
12078   return immtype;
12079 #undef CHECK
12080 }
12081
12082 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12083    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12084    float elements), and a modified constant (whatever should be output for a
12085    VMOV) in *MODCONST.  */
12086
12087 int
12088 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12089                                rtx *modconst, int *elementwidth)
12090 {
12091   rtx tmpconst;
12092   int tmpwidth;
12093   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12094
12095   if (retval == -1)
12096     return 0;
12097
12098   if (modconst)
12099     *modconst = tmpconst;
12100
12101   if (elementwidth)
12102     *elementwidth = tmpwidth;
12103
12104   return 1;
12105 }
12106
12107 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12108    the immediate is valid, write a constant suitable for using as an operand
12109    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12110    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12111
12112 int
12113 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12114                                 rtx *modconst, int *elementwidth)
12115 {
12116   rtx tmpconst;
12117   int tmpwidth;
12118   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12119
12120   if (retval < 0 || retval > 5)
12121     return 0;
12122
12123   if (modconst)
12124     *modconst = tmpconst;
12125
12126   if (elementwidth)
12127     *elementwidth = tmpwidth;
12128
12129   return 1;
12130 }
12131
12132 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12133    the immediate is valid, write a constant suitable for using as an operand
12134    to VSHR/VSHL to *MODCONST and the corresponding element width to
12135    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12136    because they have different limitations.  */
12137
12138 int
12139 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12140                                 rtx *modconst, int *elementwidth,
12141                                 bool isleftshift)
12142 {
12143   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12144   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12145   unsigned HOST_WIDE_INT last_elt = 0;
12146   unsigned HOST_WIDE_INT maxshift;
12147
12148   /* Split vector constant out into a byte vector.  */
12149   for (i = 0; i < n_elts; i++)
12150     {
12151       rtx el = CONST_VECTOR_ELT (op, i);
12152       unsigned HOST_WIDE_INT elpart;
12153
12154       if (CONST_INT_P (el))
12155         elpart = INTVAL (el);
12156       else if (CONST_DOUBLE_P (el))
12157         return 0;
12158       else
12159         gcc_unreachable ();
12160
12161       if (i != 0 && elpart != last_elt)
12162         return 0;
12163
12164       last_elt = elpart;
12165     }
12166
12167   /* Shift less than element size.  */
12168   maxshift = innersize * 8;
12169
12170   if (isleftshift)
12171     {
12172       /* Left shift immediate value can be from 0 to <size>-1.  */
12173       if (last_elt >= maxshift)
12174         return 0;
12175     }
12176   else
12177     {
12178       /* Right shift immediate value can be from 1 to <size>.  */
12179       if (last_elt == 0 || last_elt > maxshift)
12180         return 0;
12181     }
12182
12183   if (elementwidth)
12184     *elementwidth = innersize * 8;
12185
12186   if (modconst)
12187     *modconst = CONST_VECTOR_ELT (op, 0);
12188
12189   return 1;
12190 }
12191
12192 /* Return a string suitable for output of Neon immediate logic operation
12193    MNEM.  */
12194
12195 char *
12196 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12197                              int inverse, int quad)
12198 {
12199   int width, is_valid;
12200   static char templ[40];
12201
12202   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12203
12204   gcc_assert (is_valid != 0);
12205
12206   if (quad)
12207     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12208   else
12209     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12210
12211   return templ;
12212 }
12213
12214 /* Return a string suitable for output of Neon immediate shift operation
12215    (VSHR or VSHL) MNEM.  */
12216
12217 char *
12218 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12219                              machine_mode mode, int quad,
12220                              bool isleftshift)
12221 {
12222   int width, is_valid;
12223   static char templ[40];
12224
12225   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12226   gcc_assert (is_valid != 0);
12227
12228   if (quad)
12229     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12230   else
12231     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12232
12233   return templ;
12234 }
12235
12236 /* Output a sequence of pairwise operations to implement a reduction.
12237    NOTE: We do "too much work" here, because pairwise operations work on two
12238    registers-worth of operands in one go. Unfortunately we can't exploit those
12239    extra calculations to do the full operation in fewer steps, I don't think.
12240    Although all vector elements of the result but the first are ignored, we
12241    actually calculate the same result in each of the elements. An alternative
12242    such as initially loading a vector with zero to use as each of the second
12243    operands would use up an additional register and take an extra instruction,
12244    for no particular gain.  */
12245
12246 void
12247 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12248                       rtx (*reduc) (rtx, rtx, rtx))
12249 {
12250   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12251   rtx tmpsum = op1;
12252
12253   for (i = parts / 2; i >= 1; i /= 2)
12254     {
12255       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12256       emit_insn (reduc (dest, tmpsum, tmpsum));
12257       tmpsum = dest;
12258     }
12259 }
12260
12261 /* If VALS is a vector constant that can be loaded into a register
12262    using VDUP, generate instructions to do so and return an RTX to
12263    assign to the register.  Otherwise return NULL_RTX.  */
12264
12265 static rtx
12266 neon_vdup_constant (rtx vals)
12267 {
12268   machine_mode mode = GET_MODE (vals);
12269   machine_mode inner_mode = GET_MODE_INNER (mode);
12270   rtx x;
12271
12272   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12273     return NULL_RTX;
12274
12275   if (!const_vec_duplicate_p (vals, &x))
12276     /* The elements are not all the same.  We could handle repeating
12277        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12278        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12279        vdup.i16).  */
12280     return NULL_RTX;
12281
12282   /* We can load this constant by using VDUP and a constant in a
12283      single ARM register.  This will be cheaper than a vector
12284      load.  */
12285
12286   x = copy_to_mode_reg (inner_mode, x);
12287   return gen_vec_duplicate (mode, x);
12288 }
12289
12290 /* Generate code to load VALS, which is a PARALLEL containing only
12291    constants (for vec_init) or CONST_VECTOR, efficiently into a
12292    register.  Returns an RTX to copy into the register, or NULL_RTX
12293    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12294
12295 rtx
12296 neon_make_constant (rtx vals)
12297 {
12298   machine_mode mode = GET_MODE (vals);
12299   rtx target;
12300   rtx const_vec = NULL_RTX;
12301   int n_elts = GET_MODE_NUNITS (mode);
12302   int n_const = 0;
12303   int i;
12304
12305   if (GET_CODE (vals) == CONST_VECTOR)
12306     const_vec = vals;
12307   else if (GET_CODE (vals) == PARALLEL)
12308     {
12309       /* A CONST_VECTOR must contain only CONST_INTs and
12310          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12311          Only store valid constants in a CONST_VECTOR.  */
12312       for (i = 0; i < n_elts; ++i)
12313         {
12314           rtx x = XVECEXP (vals, 0, i);
12315           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12316             n_const++;
12317         }
12318       if (n_const == n_elts)
12319         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12320     }
12321   else
12322     gcc_unreachable ();
12323
12324   if (const_vec != NULL
12325       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12326     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12327     return const_vec;
12328   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12329     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12330        pipeline cycle; creating the constant takes one or two ARM
12331        pipeline cycles.  */
12332     return target;
12333   else if (const_vec != NULL_RTX)
12334     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12335        (for either double or quad vectors).  We can not take advantage
12336        of single-cycle VLD1 because we need a PC-relative addressing
12337        mode.  */
12338     return const_vec;
12339   else
12340     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12341        We can not construct an initializer.  */
12342     return NULL_RTX;
12343 }
12344
12345 /* Initialize vector TARGET to VALS.  */
12346
12347 void
12348 neon_expand_vector_init (rtx target, rtx vals)
12349 {
12350   machine_mode mode = GET_MODE (target);
12351   machine_mode inner_mode = GET_MODE_INNER (mode);
12352   int n_elts = GET_MODE_NUNITS (mode);
12353   int n_var = 0, one_var = -1;
12354   bool all_same = true;
12355   rtx x, mem;
12356   int i;
12357
12358   for (i = 0; i < n_elts; ++i)
12359     {
12360       x = XVECEXP (vals, 0, i);
12361       if (!CONSTANT_P (x))
12362         ++n_var, one_var = i;
12363
12364       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12365         all_same = false;
12366     }
12367
12368   if (n_var == 0)
12369     {
12370       rtx constant = neon_make_constant (vals);
12371       if (constant != NULL_RTX)
12372         {
12373           emit_move_insn (target, constant);
12374           return;
12375         }
12376     }
12377
12378   /* Splat a single non-constant element if we can.  */
12379   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12380     {
12381       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12382       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12383       return;
12384     }
12385
12386   /* One field is non-constant.  Load constant then overwrite varying
12387      field.  This is more efficient than using the stack.  */
12388   if (n_var == 1)
12389     {
12390       rtx copy = copy_rtx (vals);
12391       rtx index = GEN_INT (one_var);
12392
12393       /* Load constant part of vector, substitute neighboring value for
12394          varying element.  */
12395       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12396       neon_expand_vector_init (target, copy);
12397
12398       /* Insert variable.  */
12399       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12400       switch (mode)
12401         {
12402         case E_V8QImode:
12403           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12404           break;
12405         case E_V16QImode:
12406           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12407           break;
12408         case E_V4HImode:
12409           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12410           break;
12411         case E_V8HImode:
12412           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12413           break;
12414         case E_V2SImode:
12415           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12416           break;
12417         case E_V4SImode:
12418           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12419           break;
12420         case E_V2SFmode:
12421           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12422           break;
12423         case E_V4SFmode:
12424           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12425           break;
12426         case E_V2DImode:
12427           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12428           break;
12429         default:
12430           gcc_unreachable ();
12431         }
12432       return;
12433     }
12434
12435   /* Construct the vector in memory one field at a time
12436      and load the whole vector.  */
12437   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12438   for (i = 0; i < n_elts; i++)
12439     emit_move_insn (adjust_address_nv (mem, inner_mode,
12440                                     i * GET_MODE_SIZE (inner_mode)),
12441                     XVECEXP (vals, 0, i));
12442   emit_move_insn (target, mem);
12443 }
12444
12445 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12446    ERR if it doesn't.  EXP indicates the source location, which includes the
12447    inlining history for intrinsics.  */
12448
12449 static void
12450 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12451               const_tree exp, const char *desc)
12452 {
12453   HOST_WIDE_INT lane;
12454
12455   gcc_assert (CONST_INT_P (operand));
12456
12457   lane = INTVAL (operand);
12458
12459   if (lane < low || lane >= high)
12460     {
12461       if (exp)
12462         error ("%K%s %wd out of range %wd - %wd",
12463                exp, desc, lane, low, high - 1);
12464       else
12465         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12466     }
12467 }
12468
12469 /* Bounds-check lanes.  */
12470
12471 void
12472 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12473                   const_tree exp)
12474 {
12475   bounds_check (operand, low, high, exp, "lane");
12476 }
12477
12478 /* Bounds-check constants.  */
12479
12480 void
12481 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12482 {
12483   bounds_check (operand, low, high, NULL_TREE, "constant");
12484 }
12485
12486 HOST_WIDE_INT
12487 neon_element_bits (machine_mode mode)
12488 {
12489   return GET_MODE_UNIT_BITSIZE (mode);
12490 }
12491
12492 \f
12493 /* Predicates for `match_operand' and `match_operator'.  */
12494
12495 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12496    WB is true if full writeback address modes are allowed and is false
12497    if limited writeback address modes (POST_INC and PRE_DEC) are
12498    allowed.  */
12499
12500 int
12501 arm_coproc_mem_operand (rtx op, bool wb)
12502 {
12503   rtx ind;
12504
12505   /* Reject eliminable registers.  */
12506   if (! (reload_in_progress || reload_completed || lra_in_progress)
12507       && (   reg_mentioned_p (frame_pointer_rtx, op)
12508           || reg_mentioned_p (arg_pointer_rtx, op)
12509           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12510           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12511           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12512           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12513     return FALSE;
12514
12515   /* Constants are converted into offsets from labels.  */
12516   if (!MEM_P (op))
12517     return FALSE;
12518
12519   ind = XEXP (op, 0);
12520
12521   if (reload_completed
12522       && (GET_CODE (ind) == LABEL_REF
12523           || (GET_CODE (ind) == CONST
12524               && GET_CODE (XEXP (ind, 0)) == PLUS
12525               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12526               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12527     return TRUE;
12528
12529   /* Match: (mem (reg)).  */
12530   if (REG_P (ind))
12531     return arm_address_register_rtx_p (ind, 0);
12532
12533   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12534      acceptable in any case (subject to verification by
12535      arm_address_register_rtx_p).  We need WB to be true to accept
12536      PRE_INC and POST_DEC.  */
12537   if (GET_CODE (ind) == POST_INC
12538       || GET_CODE (ind) == PRE_DEC
12539       || (wb
12540           && (GET_CODE (ind) == PRE_INC
12541               || GET_CODE (ind) == POST_DEC)))
12542     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12543
12544   if (wb
12545       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12546       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12547       && GET_CODE (XEXP (ind, 1)) == PLUS
12548       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12549     ind = XEXP (ind, 1);
12550
12551   /* Match:
12552      (plus (reg)
12553            (const)).  */
12554   if (GET_CODE (ind) == PLUS
12555       && REG_P (XEXP (ind, 0))
12556       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12557       && CONST_INT_P (XEXP (ind, 1))
12558       && INTVAL (XEXP (ind, 1)) > -1024
12559       && INTVAL (XEXP (ind, 1)) <  1024
12560       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12561     return TRUE;
12562
12563   return FALSE;
12564 }
12565
12566 /* Return TRUE if OP is a memory operand which we can load or store a vector
12567    to/from. TYPE is one of the following values:
12568     0 - Vector load/stor (vldr)
12569     1 - Core registers (ldm)
12570     2 - Element/structure loads (vld1)
12571  */
12572 int
12573 neon_vector_mem_operand (rtx op, int type, bool strict)
12574 {
12575   rtx ind;
12576
12577   /* Reject eliminable registers.  */
12578   if (strict && ! (reload_in_progress || reload_completed)
12579       && (reg_mentioned_p (frame_pointer_rtx, op)
12580           || reg_mentioned_p (arg_pointer_rtx, op)
12581           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12582           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12583           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12584           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12585     return FALSE;
12586
12587   /* Constants are converted into offsets from labels.  */
12588   if (!MEM_P (op))
12589     return FALSE;
12590
12591   ind = XEXP (op, 0);
12592
12593   if (reload_completed
12594       && (GET_CODE (ind) == LABEL_REF
12595           || (GET_CODE (ind) == CONST
12596               && GET_CODE (XEXP (ind, 0)) == PLUS
12597               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12598               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12599     return TRUE;
12600
12601   /* Match: (mem (reg)).  */
12602   if (REG_P (ind))
12603     return arm_address_register_rtx_p (ind, 0);
12604
12605   /* Allow post-increment with Neon registers.  */
12606   if ((type != 1 && GET_CODE (ind) == POST_INC)
12607       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12608     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12609
12610   /* Allow post-increment by register for VLDn */
12611   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12612       && GET_CODE (XEXP (ind, 1)) == PLUS
12613       && REG_P (XEXP (XEXP (ind, 1), 1)))
12614      return true;
12615
12616   /* Match:
12617      (plus (reg)
12618           (const)).  */
12619   if (type == 0
12620       && GET_CODE (ind) == PLUS
12621       && REG_P (XEXP (ind, 0))
12622       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12623       && CONST_INT_P (XEXP (ind, 1))
12624       && INTVAL (XEXP (ind, 1)) > -1024
12625       /* For quad modes, we restrict the constant offset to be slightly less
12626          than what the instruction format permits.  We have no such constraint
12627          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12628       && (INTVAL (XEXP (ind, 1))
12629           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12630       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12631     return TRUE;
12632
12633   return FALSE;
12634 }
12635
12636 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12637    type.  */
12638 int
12639 neon_struct_mem_operand (rtx op)
12640 {
12641   rtx ind;
12642
12643   /* Reject eliminable registers.  */
12644   if (! (reload_in_progress || reload_completed)
12645       && (   reg_mentioned_p (frame_pointer_rtx, op)
12646           || reg_mentioned_p (arg_pointer_rtx, op)
12647           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12648           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12649           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12650           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12651     return FALSE;
12652
12653   /* Constants are converted into offsets from labels.  */
12654   if (!MEM_P (op))
12655     return FALSE;
12656
12657   ind = XEXP (op, 0);
12658
12659   if (reload_completed
12660       && (GET_CODE (ind) == LABEL_REF
12661           || (GET_CODE (ind) == CONST
12662               && GET_CODE (XEXP (ind, 0)) == PLUS
12663               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12664               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12665     return TRUE;
12666
12667   /* Match: (mem (reg)).  */
12668   if (REG_P (ind))
12669     return arm_address_register_rtx_p (ind, 0);
12670
12671   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12672   if (GET_CODE (ind) == POST_INC
12673       || GET_CODE (ind) == PRE_DEC)
12674     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12675
12676   return FALSE;
12677 }
12678
12679 /* Return true if X is a register that will be eliminated later on.  */
12680 int
12681 arm_eliminable_register (rtx x)
12682 {
12683   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12684                        || REGNO (x) == ARG_POINTER_REGNUM
12685                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12686                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12687 }
12688
12689 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12690    coprocessor registers.  Otherwise return NO_REGS.  */
12691
12692 enum reg_class
12693 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12694 {
12695   if (mode == HFmode)
12696     {
12697       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12698         return GENERAL_REGS;
12699       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12700         return NO_REGS;
12701       return GENERAL_REGS;
12702     }
12703
12704   /* The neon move patterns handle all legitimate vector and struct
12705      addresses.  */
12706   if (TARGET_NEON
12707       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12708       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12709           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12710           || VALID_NEON_STRUCT_MODE (mode)))
12711     return NO_REGS;
12712
12713   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12714     return NO_REGS;
12715
12716   return GENERAL_REGS;
12717 }
12718
12719 /* Values which must be returned in the most-significant end of the return
12720    register.  */
12721
12722 static bool
12723 arm_return_in_msb (const_tree valtype)
12724 {
12725   return (TARGET_AAPCS_BASED
12726           && BYTES_BIG_ENDIAN
12727           && (AGGREGATE_TYPE_P (valtype)
12728               || TREE_CODE (valtype) == COMPLEX_TYPE
12729               || FIXED_POINT_TYPE_P (valtype)));
12730 }
12731
12732 /* Return TRUE if X references a SYMBOL_REF.  */
12733 int
12734 symbol_mentioned_p (rtx x)
12735 {
12736   const char * fmt;
12737   int i;
12738
12739   if (GET_CODE (x) == SYMBOL_REF)
12740     return 1;
12741
12742   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12743      are constant offsets, not symbols.  */
12744   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12745     return 0;
12746
12747   fmt = GET_RTX_FORMAT (GET_CODE (x));
12748
12749   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12750     {
12751       if (fmt[i] == 'E')
12752         {
12753           int j;
12754
12755           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12756             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12757               return 1;
12758         }
12759       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12760         return 1;
12761     }
12762
12763   return 0;
12764 }
12765
12766 /* Return TRUE if X references a LABEL_REF.  */
12767 int
12768 label_mentioned_p (rtx x)
12769 {
12770   const char * fmt;
12771   int i;
12772
12773   if (GET_CODE (x) == LABEL_REF)
12774     return 1;
12775
12776   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12777      instruction, but they are constant offsets, not symbols.  */
12778   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12779     return 0;
12780
12781   fmt = GET_RTX_FORMAT (GET_CODE (x));
12782   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12783     {
12784       if (fmt[i] == 'E')
12785         {
12786           int j;
12787
12788           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12789             if (label_mentioned_p (XVECEXP (x, i, j)))
12790               return 1;
12791         }
12792       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12793         return 1;
12794     }
12795
12796   return 0;
12797 }
12798
12799 int
12800 tls_mentioned_p (rtx x)
12801 {
12802   switch (GET_CODE (x))
12803     {
12804     case CONST:
12805       return tls_mentioned_p (XEXP (x, 0));
12806
12807     case UNSPEC:
12808       if (XINT (x, 1) == UNSPEC_TLS)
12809         return 1;
12810
12811     /* Fall through.  */
12812     default:
12813       return 0;
12814     }
12815 }
12816
12817 /* Must not copy any rtx that uses a pc-relative address.
12818    Also, disallow copying of load-exclusive instructions that
12819    may appear after splitting of compare-and-swap-style operations
12820    so as to prevent those loops from being transformed away from their
12821    canonical forms (see PR 69904).  */
12822
12823 static bool
12824 arm_cannot_copy_insn_p (rtx_insn *insn)
12825 {
12826   /* The tls call insn cannot be copied, as it is paired with a data
12827      word.  */
12828   if (recog_memoized (insn) == CODE_FOR_tlscall)
12829     return true;
12830
12831   subrtx_iterator::array_type array;
12832   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12833     {
12834       const_rtx x = *iter;
12835       if (GET_CODE (x) == UNSPEC
12836           && (XINT (x, 1) == UNSPEC_PIC_BASE
12837               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12838         return true;
12839     }
12840
12841   rtx set = single_set (insn);
12842   if (set)
12843     {
12844       rtx src = SET_SRC (set);
12845       if (GET_CODE (src) == ZERO_EXTEND)
12846         src = XEXP (src, 0);
12847
12848       /* Catch the load-exclusive and load-acquire operations.  */
12849       if (GET_CODE (src) == UNSPEC_VOLATILE
12850           && (XINT (src, 1) == VUNSPEC_LL
12851               || XINT (src, 1) == VUNSPEC_LAX))
12852         return true;
12853     }
12854   return false;
12855 }
12856
12857 enum rtx_code
12858 minmax_code (rtx x)
12859 {
12860   enum rtx_code code = GET_CODE (x);
12861
12862   switch (code)
12863     {
12864     case SMAX:
12865       return GE;
12866     case SMIN:
12867       return LE;
12868     case UMIN:
12869       return LEU;
12870     case UMAX:
12871       return GEU;
12872     default:
12873       gcc_unreachable ();
12874     }
12875 }
12876
12877 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12878
12879 bool
12880 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12881                         int *mask, bool *signed_sat)
12882 {
12883   /* The high bound must be a power of two minus one.  */
12884   int log = exact_log2 (INTVAL (hi_bound) + 1);
12885   if (log == -1)
12886     return false;
12887
12888   /* The low bound is either zero (for usat) or one less than the
12889      negation of the high bound (for ssat).  */
12890   if (INTVAL (lo_bound) == 0)
12891     {
12892       if (mask)
12893         *mask = log;
12894       if (signed_sat)
12895         *signed_sat = false;
12896
12897       return true;
12898     }
12899
12900   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12901     {
12902       if (mask)
12903         *mask = log + 1;
12904       if (signed_sat)
12905         *signed_sat = true;
12906
12907       return true;
12908     }
12909
12910   return false;
12911 }
12912
12913 /* Return 1 if memory locations are adjacent.  */
12914 int
12915 adjacent_mem_locations (rtx a, rtx b)
12916 {
12917   /* We don't guarantee to preserve the order of these memory refs.  */
12918   if (volatile_refs_p (a) || volatile_refs_p (b))
12919     return 0;
12920
12921   if ((REG_P (XEXP (a, 0))
12922        || (GET_CODE (XEXP (a, 0)) == PLUS
12923            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12924       && (REG_P (XEXP (b, 0))
12925           || (GET_CODE (XEXP (b, 0)) == PLUS
12926               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12927     {
12928       HOST_WIDE_INT val0 = 0, val1 = 0;
12929       rtx reg0, reg1;
12930       int val_diff;
12931
12932       if (GET_CODE (XEXP (a, 0)) == PLUS)
12933         {
12934           reg0 = XEXP (XEXP (a, 0), 0);
12935           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12936         }
12937       else
12938         reg0 = XEXP (a, 0);
12939
12940       if (GET_CODE (XEXP (b, 0)) == PLUS)
12941         {
12942           reg1 = XEXP (XEXP (b, 0), 0);
12943           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12944         }
12945       else
12946         reg1 = XEXP (b, 0);
12947
12948       /* Don't accept any offset that will require multiple
12949          instructions to handle, since this would cause the
12950          arith_adjacentmem pattern to output an overlong sequence.  */
12951       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12952         return 0;
12953
12954       /* Don't allow an eliminable register: register elimination can make
12955          the offset too large.  */
12956       if (arm_eliminable_register (reg0))
12957         return 0;
12958
12959       val_diff = val1 - val0;
12960
12961       if (arm_ld_sched)
12962         {
12963           /* If the target has load delay slots, then there's no benefit
12964              to using an ldm instruction unless the offset is zero and
12965              we are optimizing for size.  */
12966           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12967                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12968                   && (val_diff == 4 || val_diff == -4));
12969         }
12970
12971       return ((REGNO (reg0) == REGNO (reg1))
12972               && (val_diff == 4 || val_diff == -4));
12973     }
12974
12975   return 0;
12976 }
12977
12978 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12979    for load operations, false for store operations.  CONSECUTIVE is true
12980    if the register numbers in the operation must be consecutive in the register
12981    bank. RETURN_PC is true if value is to be loaded in PC.
12982    The pattern we are trying to match for load is:
12983      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12984       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12985        :
12986        :
12987       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12988      ]
12989      where
12990      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12991      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12992      3.  If consecutive is TRUE, then for kth register being loaded,
12993          REGNO (R_dk) = REGNO (R_d0) + k.
12994    The pattern for store is similar.  */
12995 bool
12996 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12997                      bool consecutive, bool return_pc)
12998 {
12999   HOST_WIDE_INT count = XVECLEN (op, 0);
13000   rtx reg, mem, addr;
13001   unsigned regno;
13002   unsigned first_regno;
13003   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13004   rtx elt;
13005   bool addr_reg_in_reglist = false;
13006   bool update = false;
13007   int reg_increment;
13008   int offset_adj;
13009   int regs_per_val;
13010
13011   /* If not in SImode, then registers must be consecutive
13012      (e.g., VLDM instructions for DFmode).  */
13013   gcc_assert ((mode == SImode) || consecutive);
13014   /* Setting return_pc for stores is illegal.  */
13015   gcc_assert (!return_pc || load);
13016
13017   /* Set up the increments and the regs per val based on the mode.  */
13018   reg_increment = GET_MODE_SIZE (mode);
13019   regs_per_val = reg_increment / 4;
13020   offset_adj = return_pc ? 1 : 0;
13021
13022   if (count <= 1
13023       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13024       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13025     return false;
13026
13027   /* Check if this is a write-back.  */
13028   elt = XVECEXP (op, 0, offset_adj);
13029   if (GET_CODE (SET_SRC (elt)) == PLUS)
13030     {
13031       i++;
13032       base = 1;
13033       update = true;
13034
13035       /* The offset adjustment must be the number of registers being
13036          popped times the size of a single register.  */
13037       if (!REG_P (SET_DEST (elt))
13038           || !REG_P (XEXP (SET_SRC (elt), 0))
13039           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13040           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13041           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13042              ((count - 1 - offset_adj) * reg_increment))
13043         return false;
13044     }
13045
13046   i = i + offset_adj;
13047   base = base + offset_adj;
13048   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13049      success depends on the type: VLDM can do just one reg,
13050      LDM must do at least two.  */
13051   if ((count <= i) && (mode == SImode))
13052       return false;
13053
13054   elt = XVECEXP (op, 0, i - 1);
13055   if (GET_CODE (elt) != SET)
13056     return false;
13057
13058   if (load)
13059     {
13060       reg = SET_DEST (elt);
13061       mem = SET_SRC (elt);
13062     }
13063   else
13064     {
13065       reg = SET_SRC (elt);
13066       mem = SET_DEST (elt);
13067     }
13068
13069   if (!REG_P (reg) || !MEM_P (mem))
13070     return false;
13071
13072   regno = REGNO (reg);
13073   first_regno = regno;
13074   addr = XEXP (mem, 0);
13075   if (GET_CODE (addr) == PLUS)
13076     {
13077       if (!CONST_INT_P (XEXP (addr, 1)))
13078         return false;
13079
13080       offset = INTVAL (XEXP (addr, 1));
13081       addr = XEXP (addr, 0);
13082     }
13083
13084   if (!REG_P (addr))
13085     return false;
13086
13087   /* Don't allow SP to be loaded unless it is also the base register. It
13088      guarantees that SP is reset correctly when an LDM instruction
13089      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13090   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13091     return false;
13092
13093   for (; i < count; i++)
13094     {
13095       elt = XVECEXP (op, 0, i);
13096       if (GET_CODE (elt) != SET)
13097         return false;
13098
13099       if (load)
13100         {
13101           reg = SET_DEST (elt);
13102           mem = SET_SRC (elt);
13103         }
13104       else
13105         {
13106           reg = SET_SRC (elt);
13107           mem = SET_DEST (elt);
13108         }
13109
13110       if (!REG_P (reg)
13111           || GET_MODE (reg) != mode
13112           || REGNO (reg) <= regno
13113           || (consecutive
13114               && (REGNO (reg) !=
13115                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13116           /* Don't allow SP to be loaded unless it is also the base register. It
13117              guarantees that SP is reset correctly when an LDM instruction
13118              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13119           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13120           || !MEM_P (mem)
13121           || GET_MODE (mem) != mode
13122           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13123                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13124                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13125                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13126                    offset + (i - base) * reg_increment))
13127               && (!REG_P (XEXP (mem, 0))
13128                   || offset + (i - base) * reg_increment != 0)))
13129         return false;
13130
13131       regno = REGNO (reg);
13132       if (regno == REGNO (addr))
13133         addr_reg_in_reglist = true;
13134     }
13135
13136   if (load)
13137     {
13138       if (update && addr_reg_in_reglist)
13139         return false;
13140
13141       /* For Thumb-1, address register is always modified - either by write-back
13142          or by explicit load.  If the pattern does not describe an update,
13143          then the address register must be in the list of loaded registers.  */
13144       if (TARGET_THUMB1)
13145         return update || addr_reg_in_reglist;
13146     }
13147
13148   return true;
13149 }
13150
13151 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13152    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13153    instruction.  ADD_OFFSET is nonzero if the base address register needs
13154    to be modified with an add instruction before we can use it.  */
13155
13156 static bool
13157 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13158                                  int nops, HOST_WIDE_INT add_offset)
13159  {
13160   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13161      if the offset isn't small enough.  The reason 2 ldrs are faster
13162      is because these ARMs are able to do more than one cache access
13163      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13164      whilst the ARM8 has a double bandwidth cache.  This means that
13165      these cores can do both an instruction fetch and a data fetch in
13166      a single cycle, so the trick of calculating the address into a
13167      scratch register (one of the result regs) and then doing a load
13168      multiple actually becomes slower (and no smaller in code size).
13169      That is the transformation
13170
13171         ldr     rd1, [rbase + offset]
13172         ldr     rd2, [rbase + offset + 4]
13173
13174      to
13175
13176         add     rd1, rbase, offset
13177         ldmia   rd1, {rd1, rd2}
13178
13179      produces worse code -- '3 cycles + any stalls on rd2' instead of
13180      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13181      access per cycle, the first sequence could never complete in less
13182      than 6 cycles, whereas the ldm sequence would only take 5 and
13183      would make better use of sequential accesses if not hitting the
13184      cache.
13185
13186      We cheat here and test 'arm_ld_sched' which we currently know to
13187      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13188      changes, then the test below needs to be reworked.  */
13189   if (nops == 2 && arm_ld_sched && add_offset != 0)
13190     return false;
13191
13192   /* XScale has load-store double instructions, but they have stricter
13193      alignment requirements than load-store multiple, so we cannot
13194      use them.
13195
13196      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13197      the pipeline until completion.
13198
13199         NREGS           CYCLES
13200           1               3
13201           2               4
13202           3               5
13203           4               6
13204
13205      An ldr instruction takes 1-3 cycles, but does not block the
13206      pipeline.
13207
13208         NREGS           CYCLES
13209           1              1-3
13210           2              2-6
13211           3              3-9
13212           4              4-12
13213
13214      Best case ldr will always win.  However, the more ldr instructions
13215      we issue, the less likely we are to be able to schedule them well.
13216      Using ldr instructions also increases code size.
13217
13218      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13219      for counts of 3 or 4 regs.  */
13220   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13221     return false;
13222   return true;
13223 }
13224
13225 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13226    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13227    an array ORDER which describes the sequence to use when accessing the
13228    offsets that produces an ascending order.  In this sequence, each
13229    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13230    must have been filled in with the lowest offset by the caller.
13231    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13232    we use to verify that ORDER produces an ascending order of registers.
13233    Return true if it was possible to construct such an order, false if
13234    not.  */
13235
13236 static bool
13237 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13238                       int *unsorted_regs)
13239 {
13240   int i;
13241   for (i = 1; i < nops; i++)
13242     {
13243       int j;
13244
13245       order[i] = order[i - 1];
13246       for (j = 0; j < nops; j++)
13247         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13248           {
13249             /* We must find exactly one offset that is higher than the
13250                previous one by 4.  */
13251             if (order[i] != order[i - 1])
13252               return false;
13253             order[i] = j;
13254           }
13255       if (order[i] == order[i - 1])
13256         return false;
13257       /* The register numbers must be ascending.  */
13258       if (unsorted_regs != NULL
13259           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13260         return false;
13261     }
13262   return true;
13263 }
13264
13265 /* Used to determine in a peephole whether a sequence of load
13266    instructions can be changed into a load-multiple instruction.
13267    NOPS is the number of separate load instructions we are examining.  The
13268    first NOPS entries in OPERANDS are the destination registers, the
13269    next NOPS entries are memory operands.  If this function is
13270    successful, *BASE is set to the common base register of the memory
13271    accesses; *LOAD_OFFSET is set to the first memory location's offset
13272    from that base register.
13273    REGS is an array filled in with the destination register numbers.
13274    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13275    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13276    the sequence of registers in REGS matches the loads from ascending memory
13277    locations, and the function verifies that the register numbers are
13278    themselves ascending.  If CHECK_REGS is false, the register numbers
13279    are stored in the order they are found in the operands.  */
13280 static int
13281 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13282                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13283 {
13284   int unsorted_regs[MAX_LDM_STM_OPS];
13285   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13286   int order[MAX_LDM_STM_OPS];
13287   rtx base_reg_rtx = NULL;
13288   int base_reg = -1;
13289   int i, ldm_case;
13290
13291   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13292      easily extended if required.  */
13293   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13294
13295   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13296
13297   /* Loop over the operands and check that the memory references are
13298      suitable (i.e. immediate offsets from the same base register).  At
13299      the same time, extract the target register, and the memory
13300      offsets.  */
13301   for (i = 0; i < nops; i++)
13302     {
13303       rtx reg;
13304       rtx offset;
13305
13306       /* Convert a subreg of a mem into the mem itself.  */
13307       if (GET_CODE (operands[nops + i]) == SUBREG)
13308         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13309
13310       gcc_assert (MEM_P (operands[nops + i]));
13311
13312       /* Don't reorder volatile memory references; it doesn't seem worth
13313          looking for the case where the order is ok anyway.  */
13314       if (MEM_VOLATILE_P (operands[nops + i]))
13315         return 0;
13316
13317       offset = const0_rtx;
13318
13319       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13320            || (GET_CODE (reg) == SUBREG
13321                && REG_P (reg = SUBREG_REG (reg))))
13322           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13323               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13324                   || (GET_CODE (reg) == SUBREG
13325                       && REG_P (reg = SUBREG_REG (reg))))
13326               && (CONST_INT_P (offset
13327                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13328         {
13329           if (i == 0)
13330             {
13331               base_reg = REGNO (reg);
13332               base_reg_rtx = reg;
13333               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13334                 return 0;
13335             }
13336           else if (base_reg != (int) REGNO (reg))
13337             /* Not addressed from the same base register.  */
13338             return 0;
13339
13340           unsorted_regs[i] = (REG_P (operands[i])
13341                               ? REGNO (operands[i])
13342                               : REGNO (SUBREG_REG (operands[i])));
13343
13344           /* If it isn't an integer register, or if it overwrites the
13345              base register but isn't the last insn in the list, then
13346              we can't do this.  */
13347           if (unsorted_regs[i] < 0
13348               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13349               || unsorted_regs[i] > 14
13350               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13351             return 0;
13352
13353           /* Don't allow SP to be loaded unless it is also the base
13354              register.  It guarantees that SP is reset correctly when
13355              an LDM instruction is interrupted.  Otherwise, we might
13356              end up with a corrupt stack.  */
13357           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13358             return 0;
13359
13360           unsorted_offsets[i] = INTVAL (offset);
13361           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13362             order[0] = i;
13363         }
13364       else
13365         /* Not a suitable memory address.  */
13366         return 0;
13367     }
13368
13369   /* All the useful information has now been extracted from the
13370      operands into unsorted_regs and unsorted_offsets; additionally,
13371      order[0] has been set to the lowest offset in the list.  Sort
13372      the offsets into order, verifying that they are adjacent, and
13373      check that the register numbers are ascending.  */
13374   if (!compute_offset_order (nops, unsorted_offsets, order,
13375                              check_regs ? unsorted_regs : NULL))
13376     return 0;
13377
13378   if (saved_order)
13379     memcpy (saved_order, order, sizeof order);
13380
13381   if (base)
13382     {
13383       *base = base_reg;
13384
13385       for (i = 0; i < nops; i++)
13386         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13387
13388       *load_offset = unsorted_offsets[order[0]];
13389     }
13390
13391   if (TARGET_THUMB1
13392       && !peep2_reg_dead_p (nops, base_reg_rtx))
13393     return 0;
13394
13395   if (unsorted_offsets[order[0]] == 0)
13396     ldm_case = 1; /* ldmia */
13397   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13398     ldm_case = 2; /* ldmib */
13399   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13400     ldm_case = 3; /* ldmda */
13401   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13402     ldm_case = 4; /* ldmdb */
13403   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13404            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13405     ldm_case = 5;
13406   else
13407     return 0;
13408
13409   if (!multiple_operation_profitable_p (false, nops,
13410                                         ldm_case == 5
13411                                         ? unsorted_offsets[order[0]] : 0))
13412     return 0;
13413
13414   return ldm_case;
13415 }
13416
13417 /* Used to determine in a peephole whether a sequence of store instructions can
13418    be changed into a store-multiple instruction.
13419    NOPS is the number of separate store instructions we are examining.
13420    NOPS_TOTAL is the total number of instructions recognized by the peephole
13421    pattern.
13422    The first NOPS entries in OPERANDS are the source registers, the next
13423    NOPS entries are memory operands.  If this function is successful, *BASE is
13424    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13425    to the first memory location's offset from that base register.  REGS is an
13426    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13427    likewise filled with the corresponding rtx's.
13428    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13429    numbers to an ascending order of stores.
13430    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13431    from ascending memory locations, and the function verifies that the register
13432    numbers are themselves ascending.  If CHECK_REGS is false, the register
13433    numbers are stored in the order they are found in the operands.  */
13434 static int
13435 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13436                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13437                          HOST_WIDE_INT *load_offset, bool check_regs)
13438 {
13439   int unsorted_regs[MAX_LDM_STM_OPS];
13440   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13441   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13442   int order[MAX_LDM_STM_OPS];
13443   int base_reg = -1;
13444   rtx base_reg_rtx = NULL;
13445   int i, stm_case;
13446
13447   /* Write back of base register is currently only supported for Thumb 1.  */
13448   int base_writeback = TARGET_THUMB1;
13449
13450   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13451      easily extended if required.  */
13452   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13453
13454   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13455
13456   /* Loop over the operands and check that the memory references are
13457      suitable (i.e. immediate offsets from the same base register).  At
13458      the same time, extract the target register, and the memory
13459      offsets.  */
13460   for (i = 0; i < nops; i++)
13461     {
13462       rtx reg;
13463       rtx offset;
13464
13465       /* Convert a subreg of a mem into the mem itself.  */
13466       if (GET_CODE (operands[nops + i]) == SUBREG)
13467         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13468
13469       gcc_assert (MEM_P (operands[nops + i]));
13470
13471       /* Don't reorder volatile memory references; it doesn't seem worth
13472          looking for the case where the order is ok anyway.  */
13473       if (MEM_VOLATILE_P (operands[nops + i]))
13474         return 0;
13475
13476       offset = const0_rtx;
13477
13478       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13479            || (GET_CODE (reg) == SUBREG
13480                && REG_P (reg = SUBREG_REG (reg))))
13481           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13482               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13483                   || (GET_CODE (reg) == SUBREG
13484                       && REG_P (reg = SUBREG_REG (reg))))
13485               && (CONST_INT_P (offset
13486                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13487         {
13488           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13489                                   ? operands[i] : SUBREG_REG (operands[i]));
13490           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13491
13492           if (i == 0)
13493             {
13494               base_reg = REGNO (reg);
13495               base_reg_rtx = reg;
13496               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13497                 return 0;
13498             }
13499           else if (base_reg != (int) REGNO (reg))
13500             /* Not addressed from the same base register.  */
13501             return 0;
13502
13503           /* If it isn't an integer register, then we can't do this.  */
13504           if (unsorted_regs[i] < 0
13505               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13506               /* The effects are unpredictable if the base register is
13507                  both updated and stored.  */
13508               || (base_writeback && unsorted_regs[i] == base_reg)
13509               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13510               || unsorted_regs[i] > 14)
13511             return 0;
13512
13513           unsorted_offsets[i] = INTVAL (offset);
13514           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13515             order[0] = i;
13516         }
13517       else
13518         /* Not a suitable memory address.  */
13519         return 0;
13520     }
13521
13522   /* All the useful information has now been extracted from the
13523      operands into unsorted_regs and unsorted_offsets; additionally,
13524      order[0] has been set to the lowest offset in the list.  Sort
13525      the offsets into order, verifying that they are adjacent, and
13526      check that the register numbers are ascending.  */
13527   if (!compute_offset_order (nops, unsorted_offsets, order,
13528                              check_regs ? unsorted_regs : NULL))
13529     return 0;
13530
13531   if (saved_order)
13532     memcpy (saved_order, order, sizeof order);
13533
13534   if (base)
13535     {
13536       *base = base_reg;
13537
13538       for (i = 0; i < nops; i++)
13539         {
13540           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13541           if (reg_rtxs)
13542             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13543         }
13544
13545       *load_offset = unsorted_offsets[order[0]];
13546     }
13547
13548   if (TARGET_THUMB1
13549       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13550     return 0;
13551
13552   if (unsorted_offsets[order[0]] == 0)
13553     stm_case = 1; /* stmia */
13554   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13555     stm_case = 2; /* stmib */
13556   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13557     stm_case = 3; /* stmda */
13558   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13559     stm_case = 4; /* stmdb */
13560   else
13561     return 0;
13562
13563   if (!multiple_operation_profitable_p (false, nops, 0))
13564     return 0;
13565
13566   return stm_case;
13567 }
13568 \f
13569 /* Routines for use in generating RTL.  */
13570
13571 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13572    the instruction; REGS and MEMS are arrays containing the operands.
13573    BASEREG is the base register to be used in addressing the memory operands.
13574    WBACK_OFFSET is nonzero if the instruction should update the base
13575    register.  */
13576
13577 static rtx
13578 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13579                          HOST_WIDE_INT wback_offset)
13580 {
13581   int i = 0, j;
13582   rtx result;
13583
13584   if (!multiple_operation_profitable_p (false, count, 0))
13585     {
13586       rtx seq;
13587
13588       start_sequence ();
13589
13590       for (i = 0; i < count; i++)
13591         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13592
13593       if (wback_offset != 0)
13594         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13595
13596       seq = get_insns ();
13597       end_sequence ();
13598
13599       return seq;
13600     }
13601
13602   result = gen_rtx_PARALLEL (VOIDmode,
13603                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13604   if (wback_offset != 0)
13605     {
13606       XVECEXP (result, 0, 0)
13607         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13608       i = 1;
13609       count++;
13610     }
13611
13612   for (j = 0; i < count; i++, j++)
13613     XVECEXP (result, 0, i)
13614       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13615
13616   return result;
13617 }
13618
13619 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13620    the instruction; REGS and MEMS are arrays containing the operands.
13621    BASEREG is the base register to be used in addressing the memory operands.
13622    WBACK_OFFSET is nonzero if the instruction should update the base
13623    register.  */
13624
13625 static rtx
13626 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13627                           HOST_WIDE_INT wback_offset)
13628 {
13629   int i = 0, j;
13630   rtx result;
13631
13632   if (GET_CODE (basereg) == PLUS)
13633     basereg = XEXP (basereg, 0);
13634
13635   if (!multiple_operation_profitable_p (false, count, 0))
13636     {
13637       rtx seq;
13638
13639       start_sequence ();
13640
13641       for (i = 0; i < count; i++)
13642         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13643
13644       if (wback_offset != 0)
13645         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13646
13647       seq = get_insns ();
13648       end_sequence ();
13649
13650       return seq;
13651     }
13652
13653   result = gen_rtx_PARALLEL (VOIDmode,
13654                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13655   if (wback_offset != 0)
13656     {
13657       XVECEXP (result, 0, 0)
13658         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13659       i = 1;
13660       count++;
13661     }
13662
13663   for (j = 0; i < count; i++, j++)
13664     XVECEXP (result, 0, i)
13665       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13666
13667   return result;
13668 }
13669
13670 /* Generate either a load-multiple or a store-multiple instruction.  This
13671    function can be used in situations where we can start with a single MEM
13672    rtx and adjust its address upwards.
13673    COUNT is the number of operations in the instruction, not counting a
13674    possible update of the base register.  REGS is an array containing the
13675    register operands.
13676    BASEREG is the base register to be used in addressing the memory operands,
13677    which are constructed from BASEMEM.
13678    WRITE_BACK specifies whether the generated instruction should include an
13679    update of the base register.
13680    OFFSETP is used to pass an offset to and from this function; this offset
13681    is not used when constructing the address (instead BASEMEM should have an
13682    appropriate offset in its address), it is used only for setting
13683    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13684
13685 static rtx
13686 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13687                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13688 {
13689   rtx mems[MAX_LDM_STM_OPS];
13690   HOST_WIDE_INT offset = *offsetp;
13691   int i;
13692
13693   gcc_assert (count <= MAX_LDM_STM_OPS);
13694
13695   if (GET_CODE (basereg) == PLUS)
13696     basereg = XEXP (basereg, 0);
13697
13698   for (i = 0; i < count; i++)
13699     {
13700       rtx addr = plus_constant (Pmode, basereg, i * 4);
13701       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13702       offset += 4;
13703     }
13704
13705   if (write_back)
13706     *offsetp = offset;
13707
13708   if (is_load)
13709     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13710                                     write_back ? 4 * count : 0);
13711   else
13712     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13713                                      write_back ? 4 * count : 0);
13714 }
13715
13716 rtx
13717 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13718                        rtx basemem, HOST_WIDE_INT *offsetp)
13719 {
13720   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13721                               offsetp);
13722 }
13723
13724 rtx
13725 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13726                         rtx basemem, HOST_WIDE_INT *offsetp)
13727 {
13728   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13729                               offsetp);
13730 }
13731
13732 /* Called from a peephole2 expander to turn a sequence of loads into an
13733    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13734    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13735    is true if we can reorder the registers because they are used commutatively
13736    subsequently.
13737    Returns true iff we could generate a new instruction.  */
13738
13739 bool
13740 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13741 {
13742   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13743   rtx mems[MAX_LDM_STM_OPS];
13744   int i, j, base_reg;
13745   rtx base_reg_rtx;
13746   HOST_WIDE_INT offset;
13747   int write_back = FALSE;
13748   int ldm_case;
13749   rtx addr;
13750
13751   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13752                                      &base_reg, &offset, !sort_regs);
13753
13754   if (ldm_case == 0)
13755     return false;
13756
13757   if (sort_regs)
13758     for (i = 0; i < nops - 1; i++)
13759       for (j = i + 1; j < nops; j++)
13760         if (regs[i] > regs[j])
13761           {
13762             int t = regs[i];
13763             regs[i] = regs[j];
13764             regs[j] = t;
13765           }
13766   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13767
13768   if (TARGET_THUMB1)
13769     {
13770       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13771       gcc_assert (ldm_case == 1 || ldm_case == 5);
13772       write_back = TRUE;
13773     }
13774
13775   if (ldm_case == 5)
13776     {
13777       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13778       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13779       offset = 0;
13780       if (!TARGET_THUMB1)
13781         base_reg_rtx = newbase;
13782     }
13783
13784   for (i = 0; i < nops; i++)
13785     {
13786       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13787       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13788                                               SImode, addr, 0);
13789     }
13790   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13791                                       write_back ? offset + i * 4 : 0));
13792   return true;
13793 }
13794
13795 /* Called from a peephole2 expander to turn a sequence of stores into an
13796    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13797    NOPS indicates how many separate stores we are trying to combine.
13798    Returns true iff we could generate a new instruction.  */
13799
13800 bool
13801 gen_stm_seq (rtx *operands, int nops)
13802 {
13803   int i;
13804   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13805   rtx mems[MAX_LDM_STM_OPS];
13806   int base_reg;
13807   rtx base_reg_rtx;
13808   HOST_WIDE_INT offset;
13809   int write_back = FALSE;
13810   int stm_case;
13811   rtx addr;
13812   bool base_reg_dies;
13813
13814   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13815                                       mem_order, &base_reg, &offset, true);
13816
13817   if (stm_case == 0)
13818     return false;
13819
13820   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13821
13822   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13823   if (TARGET_THUMB1)
13824     {
13825       gcc_assert (base_reg_dies);
13826       write_back = TRUE;
13827     }
13828
13829   if (stm_case == 5)
13830     {
13831       gcc_assert (base_reg_dies);
13832       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13833       offset = 0;
13834     }
13835
13836   addr = plus_constant (Pmode, base_reg_rtx, offset);
13837
13838   for (i = 0; i < nops; i++)
13839     {
13840       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13841       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13842                                               SImode, addr, 0);
13843     }
13844   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13845                                        write_back ? offset + i * 4 : 0));
13846   return true;
13847 }
13848
13849 /* Called from a peephole2 expander to turn a sequence of stores that are
13850    preceded by constant loads into an STM instruction.  OPERANDS are the
13851    operands found by the peephole matcher; NOPS indicates how many
13852    separate stores we are trying to combine; there are 2 * NOPS
13853    instructions in the peephole.
13854    Returns true iff we could generate a new instruction.  */
13855
13856 bool
13857 gen_const_stm_seq (rtx *operands, int nops)
13858 {
13859   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13860   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13861   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13862   rtx mems[MAX_LDM_STM_OPS];
13863   int base_reg;
13864   rtx base_reg_rtx;
13865   HOST_WIDE_INT offset;
13866   int write_back = FALSE;
13867   int stm_case;
13868   rtx addr;
13869   bool base_reg_dies;
13870   int i, j;
13871   HARD_REG_SET allocated;
13872
13873   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13874                                       mem_order, &base_reg, &offset, false);
13875
13876   if (stm_case == 0)
13877     return false;
13878
13879   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13880
13881   /* If the same register is used more than once, try to find a free
13882      register.  */
13883   CLEAR_HARD_REG_SET (allocated);
13884   for (i = 0; i < nops; i++)
13885     {
13886       for (j = i + 1; j < nops; j++)
13887         if (regs[i] == regs[j])
13888           {
13889             rtx t = peep2_find_free_register (0, nops * 2,
13890                                               TARGET_THUMB1 ? "l" : "r",
13891                                               SImode, &allocated);
13892             if (t == NULL_RTX)
13893               return false;
13894             reg_rtxs[i] = t;
13895             regs[i] = REGNO (t);
13896           }
13897     }
13898
13899   /* Compute an ordering that maps the register numbers to an ascending
13900      sequence.  */
13901   reg_order[0] = 0;
13902   for (i = 0; i < nops; i++)
13903     if (regs[i] < regs[reg_order[0]])
13904       reg_order[0] = i;
13905
13906   for (i = 1; i < nops; i++)
13907     {
13908       int this_order = reg_order[i - 1];
13909       for (j = 0; j < nops; j++)
13910         if (regs[j] > regs[reg_order[i - 1]]
13911             && (this_order == reg_order[i - 1]
13912                 || regs[j] < regs[this_order]))
13913           this_order = j;
13914       reg_order[i] = this_order;
13915     }
13916
13917   /* Ensure that registers that must be live after the instruction end
13918      up with the correct value.  */
13919   for (i = 0; i < nops; i++)
13920     {
13921       int this_order = reg_order[i];
13922       if ((this_order != mem_order[i]
13923            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13924           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13925         return false;
13926     }
13927
13928   /* Load the constants.  */
13929   for (i = 0; i < nops; i++)
13930     {
13931       rtx op = operands[2 * nops + mem_order[i]];
13932       sorted_regs[i] = regs[reg_order[i]];
13933       emit_move_insn (reg_rtxs[reg_order[i]], op);
13934     }
13935
13936   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13937
13938   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13939   if (TARGET_THUMB1)
13940     {
13941       gcc_assert (base_reg_dies);
13942       write_back = TRUE;
13943     }
13944
13945   if (stm_case == 5)
13946     {
13947       gcc_assert (base_reg_dies);
13948       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13949       offset = 0;
13950     }
13951
13952   addr = plus_constant (Pmode, base_reg_rtx, offset);
13953
13954   for (i = 0; i < nops; i++)
13955     {
13956       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13957       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13958                                               SImode, addr, 0);
13959     }
13960   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13961                                        write_back ? offset + i * 4 : 0));
13962   return true;
13963 }
13964
13965 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13966    unaligned copies on processors which support unaligned semantics for those
13967    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13968    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13969    An interleave factor of 1 (the minimum) will perform no interleaving.
13970    Load/store multiple are used for aligned addresses where possible.  */
13971
13972 static void
13973 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13974                                    HOST_WIDE_INT length,
13975                                    unsigned int interleave_factor)
13976 {
13977   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13978   int *regnos = XALLOCAVEC (int, interleave_factor);
13979   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13980   HOST_WIDE_INT i, j;
13981   HOST_WIDE_INT remaining = length, words;
13982   rtx halfword_tmp = NULL, byte_tmp = NULL;
13983   rtx dst, src;
13984   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13985   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13986   HOST_WIDE_INT srcoffset, dstoffset;
13987   HOST_WIDE_INT src_autoinc, dst_autoinc;
13988   rtx mem, addr;
13989
13990   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13991
13992   /* Use hard registers if we have aligned source or destination so we can use
13993      load/store multiple with contiguous registers.  */
13994   if (dst_aligned || src_aligned)
13995     for (i = 0; i < interleave_factor; i++)
13996       regs[i] = gen_rtx_REG (SImode, i);
13997   else
13998     for (i = 0; i < interleave_factor; i++)
13999       regs[i] = gen_reg_rtx (SImode);
14000
14001   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14002   src = copy_addr_to_reg (XEXP (srcbase, 0));
14003
14004   srcoffset = dstoffset = 0;
14005
14006   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14007      For copying the last bytes we want to subtract this offset again.  */
14008   src_autoinc = dst_autoinc = 0;
14009
14010   for (i = 0; i < interleave_factor; i++)
14011     regnos[i] = i;
14012
14013   /* Copy BLOCK_SIZE_BYTES chunks.  */
14014
14015   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14016     {
14017       /* Load words.  */
14018       if (src_aligned && interleave_factor > 1)
14019         {
14020           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14021                                             TRUE, srcbase, &srcoffset));
14022           src_autoinc += UNITS_PER_WORD * interleave_factor;
14023         }
14024       else
14025         {
14026           for (j = 0; j < interleave_factor; j++)
14027             {
14028               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14029                                                  - src_autoinc));
14030               mem = adjust_automodify_address (srcbase, SImode, addr,
14031                                                srcoffset + j * UNITS_PER_WORD);
14032               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14033             }
14034           srcoffset += block_size_bytes;
14035         }
14036
14037       /* Store words.  */
14038       if (dst_aligned && interleave_factor > 1)
14039         {
14040           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14041                                              TRUE, dstbase, &dstoffset));
14042           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14043         }
14044       else
14045         {
14046           for (j = 0; j < interleave_factor; j++)
14047             {
14048               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14049                                                  - dst_autoinc));
14050               mem = adjust_automodify_address (dstbase, SImode, addr,
14051                                                dstoffset + j * UNITS_PER_WORD);
14052               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14053             }
14054           dstoffset += block_size_bytes;
14055         }
14056
14057       remaining -= block_size_bytes;
14058     }
14059
14060   /* Copy any whole words left (note these aren't interleaved with any
14061      subsequent halfword/byte load/stores in the interests of simplicity).  */
14062
14063   words = remaining / UNITS_PER_WORD;
14064
14065   gcc_assert (words < interleave_factor);
14066
14067   if (src_aligned && words > 1)
14068     {
14069       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14070                                         &srcoffset));
14071       src_autoinc += UNITS_PER_WORD * words;
14072     }
14073   else
14074     {
14075       for (j = 0; j < words; j++)
14076         {
14077           addr = plus_constant (Pmode, src,
14078                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14079           mem = adjust_automodify_address (srcbase, SImode, addr,
14080                                            srcoffset + j * UNITS_PER_WORD);
14081           if (src_aligned)
14082             emit_move_insn (regs[j], mem);
14083           else
14084             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14085         }
14086       srcoffset += words * UNITS_PER_WORD;
14087     }
14088
14089   if (dst_aligned && words > 1)
14090     {
14091       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14092                                          &dstoffset));
14093       dst_autoinc += words * UNITS_PER_WORD;
14094     }
14095   else
14096     {
14097       for (j = 0; j < words; j++)
14098         {
14099           addr = plus_constant (Pmode, dst,
14100                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14101           mem = adjust_automodify_address (dstbase, SImode, addr,
14102                                            dstoffset + j * UNITS_PER_WORD);
14103           if (dst_aligned)
14104             emit_move_insn (mem, regs[j]);
14105           else
14106             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14107         }
14108       dstoffset += words * UNITS_PER_WORD;
14109     }
14110
14111   remaining -= words * UNITS_PER_WORD;
14112
14113   gcc_assert (remaining < 4);
14114
14115   /* Copy a halfword if necessary.  */
14116
14117   if (remaining >= 2)
14118     {
14119       halfword_tmp = gen_reg_rtx (SImode);
14120
14121       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14122       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14123       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14124
14125       /* Either write out immediately, or delay until we've loaded the last
14126          byte, depending on interleave factor.  */
14127       if (interleave_factor == 1)
14128         {
14129           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14130           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14131           emit_insn (gen_unaligned_storehi (mem,
14132                        gen_lowpart (HImode, halfword_tmp)));
14133           halfword_tmp = NULL;
14134           dstoffset += 2;
14135         }
14136
14137       remaining -= 2;
14138       srcoffset += 2;
14139     }
14140
14141   gcc_assert (remaining < 2);
14142
14143   /* Copy last byte.  */
14144
14145   if ((remaining & 1) != 0)
14146     {
14147       byte_tmp = gen_reg_rtx (SImode);
14148
14149       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14150       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14151       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14152
14153       if (interleave_factor == 1)
14154         {
14155           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14156           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14157           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14158           byte_tmp = NULL;
14159           dstoffset++;
14160         }
14161
14162       remaining--;
14163       srcoffset++;
14164     }
14165
14166   /* Store last halfword if we haven't done so already.  */
14167
14168   if (halfword_tmp)
14169     {
14170       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14171       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14172       emit_insn (gen_unaligned_storehi (mem,
14173                    gen_lowpart (HImode, halfword_tmp)));
14174       dstoffset += 2;
14175     }
14176
14177   /* Likewise for last byte.  */
14178
14179   if (byte_tmp)
14180     {
14181       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14182       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14183       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14184       dstoffset++;
14185     }
14186
14187   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14188 }
14189
14190 /* From mips_adjust_block_mem:
14191
14192    Helper function for doing a loop-based block operation on memory
14193    reference MEM.  Each iteration of the loop will operate on LENGTH
14194    bytes of MEM.
14195
14196    Create a new base register for use within the loop and point it to
14197    the start of MEM.  Create a new memory reference that uses this
14198    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14199
14200 static void
14201 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14202                       rtx *loop_mem)
14203 {
14204   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14205
14206   /* Although the new mem does not refer to a known location,
14207      it does keep up to LENGTH bytes of alignment.  */
14208   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14209   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14210 }
14211
14212 /* From mips_block_move_loop:
14213
14214    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14215    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14216    the memory regions do not overlap.  */
14217
14218 static void
14219 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14220                                unsigned int interleave_factor,
14221                                HOST_WIDE_INT bytes_per_iter)
14222 {
14223   rtx src_reg, dest_reg, final_src, test;
14224   HOST_WIDE_INT leftover;
14225
14226   leftover = length % bytes_per_iter;
14227   length -= leftover;
14228
14229   /* Create registers and memory references for use within the loop.  */
14230   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14231   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14232
14233   /* Calculate the value that SRC_REG should have after the last iteration of
14234      the loop.  */
14235   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14236                                    0, 0, OPTAB_WIDEN);
14237
14238   /* Emit the start of the loop.  */
14239   rtx_code_label *label = gen_label_rtx ();
14240   emit_label (label);
14241
14242   /* Emit the loop body.  */
14243   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14244                                      interleave_factor);
14245
14246   /* Move on to the next block.  */
14247   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14248   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14249
14250   /* Emit the loop condition.  */
14251   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14252   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14253
14254   /* Mop up any left-over bytes.  */
14255   if (leftover)
14256     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14257 }
14258
14259 /* Emit a block move when either the source or destination is unaligned (not
14260    aligned to a four-byte boundary).  This may need further tuning depending on
14261    core type, optimize_size setting, etc.  */
14262
14263 static int
14264 arm_movmemqi_unaligned (rtx *operands)
14265 {
14266   HOST_WIDE_INT length = INTVAL (operands[2]);
14267
14268   if (optimize_size)
14269     {
14270       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14271       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14272       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14273          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14274          or dst_aligned though: allow more interleaving in those cases since the
14275          resulting code can be smaller.  */
14276       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14277       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14278
14279       if (length > 12)
14280         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14281                                        interleave_factor, bytes_per_iter);
14282       else
14283         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14284                                            interleave_factor);
14285     }
14286   else
14287     {
14288       /* Note that the loop created by arm_block_move_unaligned_loop may be
14289          subject to loop unrolling, which makes tuning this condition a little
14290          redundant.  */
14291       if (length > 32)
14292         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14293       else
14294         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14295     }
14296
14297   return 1;
14298 }
14299
14300 int
14301 arm_gen_movmemqi (rtx *operands)
14302 {
14303   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14304   HOST_WIDE_INT srcoffset, dstoffset;
14305   rtx src, dst, srcbase, dstbase;
14306   rtx part_bytes_reg = NULL;
14307   rtx mem;
14308
14309   if (!CONST_INT_P (operands[2])
14310       || !CONST_INT_P (operands[3])
14311       || INTVAL (operands[2]) > 64)
14312     return 0;
14313
14314   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14315     return arm_movmemqi_unaligned (operands);
14316
14317   if (INTVAL (operands[3]) & 3)
14318     return 0;
14319
14320   dstbase = operands[0];
14321   srcbase = operands[1];
14322
14323   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14324   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14325
14326   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14327   out_words_to_go = INTVAL (operands[2]) / 4;
14328   last_bytes = INTVAL (operands[2]) & 3;
14329   dstoffset = srcoffset = 0;
14330
14331   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14332     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14333
14334   while (in_words_to_go >= 2)
14335     {
14336       if (in_words_to_go > 4)
14337         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14338                                           TRUE, srcbase, &srcoffset));
14339       else
14340         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14341                                           src, FALSE, srcbase,
14342                                           &srcoffset));
14343
14344       if (out_words_to_go)
14345         {
14346           if (out_words_to_go > 4)
14347             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14348                                                TRUE, dstbase, &dstoffset));
14349           else if (out_words_to_go != 1)
14350             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14351                                                out_words_to_go, dst,
14352                                                (last_bytes == 0
14353                                                 ? FALSE : TRUE),
14354                                                dstbase, &dstoffset));
14355           else
14356             {
14357               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14358               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14359               if (last_bytes != 0)
14360                 {
14361                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14362                   dstoffset += 4;
14363                 }
14364             }
14365         }
14366
14367       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14368       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14369     }
14370
14371   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14372   if (out_words_to_go)
14373     {
14374       rtx sreg;
14375
14376       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14377       sreg = copy_to_reg (mem);
14378
14379       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14380       emit_move_insn (mem, sreg);
14381       in_words_to_go--;
14382
14383       gcc_assert (!in_words_to_go);     /* Sanity check */
14384     }
14385
14386   if (in_words_to_go)
14387     {
14388       gcc_assert (in_words_to_go > 0);
14389
14390       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14391       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14392     }
14393
14394   gcc_assert (!last_bytes || part_bytes_reg);
14395
14396   if (BYTES_BIG_ENDIAN && last_bytes)
14397     {
14398       rtx tmp = gen_reg_rtx (SImode);
14399
14400       /* The bytes we want are in the top end of the word.  */
14401       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14402                               GEN_INT (8 * (4 - last_bytes))));
14403       part_bytes_reg = tmp;
14404
14405       while (last_bytes)
14406         {
14407           mem = adjust_automodify_address (dstbase, QImode,
14408                                            plus_constant (Pmode, dst,
14409                                                           last_bytes - 1),
14410                                            dstoffset + last_bytes - 1);
14411           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14412
14413           if (--last_bytes)
14414             {
14415               tmp = gen_reg_rtx (SImode);
14416               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14417               part_bytes_reg = tmp;
14418             }
14419         }
14420
14421     }
14422   else
14423     {
14424       if (last_bytes > 1)
14425         {
14426           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14427           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14428           last_bytes -= 2;
14429           if (last_bytes)
14430             {
14431               rtx tmp = gen_reg_rtx (SImode);
14432               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14433               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14434               part_bytes_reg = tmp;
14435               dstoffset += 2;
14436             }
14437         }
14438
14439       if (last_bytes)
14440         {
14441           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14442           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14443         }
14444     }
14445
14446   return 1;
14447 }
14448
14449 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14450 by mode size.  */
14451 inline static rtx
14452 next_consecutive_mem (rtx mem)
14453 {
14454   machine_mode mode = GET_MODE (mem);
14455   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14456   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14457
14458   return adjust_automodify_address (mem, mode, addr, offset);
14459 }
14460
14461 /* Copy using LDRD/STRD instructions whenever possible.
14462    Returns true upon success. */
14463 bool
14464 gen_movmem_ldrd_strd (rtx *operands)
14465 {
14466   unsigned HOST_WIDE_INT len;
14467   HOST_WIDE_INT align;
14468   rtx src, dst, base;
14469   rtx reg0;
14470   bool src_aligned, dst_aligned;
14471   bool src_volatile, dst_volatile;
14472
14473   gcc_assert (CONST_INT_P (operands[2]));
14474   gcc_assert (CONST_INT_P (operands[3]));
14475
14476   len = UINTVAL (operands[2]);
14477   if (len > 64)
14478     return false;
14479
14480   /* Maximum alignment we can assume for both src and dst buffers.  */
14481   align = INTVAL (operands[3]);
14482
14483   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14484     return false;
14485
14486   /* Place src and dst addresses in registers
14487      and update the corresponding mem rtx.  */
14488   dst = operands[0];
14489   dst_volatile = MEM_VOLATILE_P (dst);
14490   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14491   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14492   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14493
14494   src = operands[1];
14495   src_volatile = MEM_VOLATILE_P (src);
14496   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14497   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14498   src = adjust_automodify_address (src, VOIDmode, base, 0);
14499
14500   if (!unaligned_access && !(src_aligned && dst_aligned))
14501     return false;
14502
14503   if (src_volatile || dst_volatile)
14504     return false;
14505
14506   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14507   if (!(dst_aligned || src_aligned))
14508     return arm_gen_movmemqi (operands);
14509
14510   /* If the either src or dst is unaligned we'll be accessing it as pairs
14511      of unaligned SImode accesses.  Otherwise we can generate DImode
14512      ldrd/strd instructions.  */
14513   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14514   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14515
14516   while (len >= 8)
14517     {
14518       len -= 8;
14519       reg0 = gen_reg_rtx (DImode);
14520       rtx low_reg = NULL_RTX;
14521       rtx hi_reg = NULL_RTX;
14522
14523       if (!src_aligned || !dst_aligned)
14524         {
14525           low_reg = gen_lowpart (SImode, reg0);
14526           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14527         }
14528       if (src_aligned)
14529         emit_move_insn (reg0, src);
14530       else
14531         {
14532           emit_insn (gen_unaligned_loadsi (low_reg, src));
14533           src = next_consecutive_mem (src);
14534           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14535         }
14536
14537       if (dst_aligned)
14538         emit_move_insn (dst, reg0);
14539       else
14540         {
14541           emit_insn (gen_unaligned_storesi (dst, low_reg));
14542           dst = next_consecutive_mem (dst);
14543           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14544         }
14545
14546       src = next_consecutive_mem (src);
14547       dst = next_consecutive_mem (dst);
14548     }
14549
14550   gcc_assert (len < 8);
14551   if (len >= 4)
14552     {
14553       /* More than a word but less than a double-word to copy.  Copy a word.  */
14554       reg0 = gen_reg_rtx (SImode);
14555       src = adjust_address (src, SImode, 0);
14556       dst = adjust_address (dst, SImode, 0);
14557       if (src_aligned)
14558         emit_move_insn (reg0, src);
14559       else
14560         emit_insn (gen_unaligned_loadsi (reg0, src));
14561
14562       if (dst_aligned)
14563         emit_move_insn (dst, reg0);
14564       else
14565         emit_insn (gen_unaligned_storesi (dst, reg0));
14566
14567       src = next_consecutive_mem (src);
14568       dst = next_consecutive_mem (dst);
14569       len -= 4;
14570     }
14571
14572   if (len == 0)
14573     return true;
14574
14575   /* Copy the remaining bytes.  */
14576   if (len >= 2)
14577     {
14578       dst = adjust_address (dst, HImode, 0);
14579       src = adjust_address (src, HImode, 0);
14580       reg0 = gen_reg_rtx (SImode);
14581       if (src_aligned)
14582         emit_insn (gen_zero_extendhisi2 (reg0, src));
14583       else
14584         emit_insn (gen_unaligned_loadhiu (reg0, src));
14585
14586       if (dst_aligned)
14587         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14588       else
14589         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14590
14591       src = next_consecutive_mem (src);
14592       dst = next_consecutive_mem (dst);
14593       if (len == 2)
14594         return true;
14595     }
14596
14597   dst = adjust_address (dst, QImode, 0);
14598   src = adjust_address (src, QImode, 0);
14599   reg0 = gen_reg_rtx (QImode);
14600   emit_move_insn (reg0, src);
14601   emit_move_insn (dst, reg0);
14602   return true;
14603 }
14604
14605 /* Select a dominance comparison mode if possible for a test of the general
14606    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14607    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14608    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14609    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14610    In all cases OP will be either EQ or NE, but we don't need to know which
14611    here.  If we are unable to support a dominance comparison we return
14612    CC mode.  This will then fail to match for the RTL expressions that
14613    generate this call.  */
14614 machine_mode
14615 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14616 {
14617   enum rtx_code cond1, cond2;
14618   int swapped = 0;
14619
14620   /* Currently we will probably get the wrong result if the individual
14621      comparisons are not simple.  This also ensures that it is safe to
14622      reverse a comparison if necessary.  */
14623   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14624        != CCmode)
14625       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14626           != CCmode))
14627     return CCmode;
14628
14629   /* The if_then_else variant of this tests the second condition if the
14630      first passes, but is true if the first fails.  Reverse the first
14631      condition to get a true "inclusive-or" expression.  */
14632   if (cond_or == DOM_CC_NX_OR_Y)
14633     cond1 = reverse_condition (cond1);
14634
14635   /* If the comparisons are not equal, and one doesn't dominate the other,
14636      then we can't do this.  */
14637   if (cond1 != cond2
14638       && !comparison_dominates_p (cond1, cond2)
14639       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14640     return CCmode;
14641
14642   if (swapped)
14643     std::swap (cond1, cond2);
14644
14645   switch (cond1)
14646     {
14647     case EQ:
14648       if (cond_or == DOM_CC_X_AND_Y)
14649         return CC_DEQmode;
14650
14651       switch (cond2)
14652         {
14653         case EQ: return CC_DEQmode;
14654         case LE: return CC_DLEmode;
14655         case LEU: return CC_DLEUmode;
14656         case GE: return CC_DGEmode;
14657         case GEU: return CC_DGEUmode;
14658         default: gcc_unreachable ();
14659         }
14660
14661     case LT:
14662       if (cond_or == DOM_CC_X_AND_Y)
14663         return CC_DLTmode;
14664
14665       switch (cond2)
14666         {
14667         case  LT:
14668             return CC_DLTmode;
14669         case LE:
14670           return CC_DLEmode;
14671         case NE:
14672           return CC_DNEmode;
14673         default:
14674           gcc_unreachable ();
14675         }
14676
14677     case GT:
14678       if (cond_or == DOM_CC_X_AND_Y)
14679         return CC_DGTmode;
14680
14681       switch (cond2)
14682         {
14683         case GT:
14684           return CC_DGTmode;
14685         case GE:
14686           return CC_DGEmode;
14687         case NE:
14688           return CC_DNEmode;
14689         default:
14690           gcc_unreachable ();
14691         }
14692
14693     case LTU:
14694       if (cond_or == DOM_CC_X_AND_Y)
14695         return CC_DLTUmode;
14696
14697       switch (cond2)
14698         {
14699         case LTU:
14700           return CC_DLTUmode;
14701         case LEU:
14702           return CC_DLEUmode;
14703         case NE:
14704           return CC_DNEmode;
14705         default:
14706           gcc_unreachable ();
14707         }
14708
14709     case GTU:
14710       if (cond_or == DOM_CC_X_AND_Y)
14711         return CC_DGTUmode;
14712
14713       switch (cond2)
14714         {
14715         case GTU:
14716           return CC_DGTUmode;
14717         case GEU:
14718           return CC_DGEUmode;
14719         case NE:
14720           return CC_DNEmode;
14721         default:
14722           gcc_unreachable ();
14723         }
14724
14725     /* The remaining cases only occur when both comparisons are the
14726        same.  */
14727     case NE:
14728       gcc_assert (cond1 == cond2);
14729       return CC_DNEmode;
14730
14731     case LE:
14732       gcc_assert (cond1 == cond2);
14733       return CC_DLEmode;
14734
14735     case GE:
14736       gcc_assert (cond1 == cond2);
14737       return CC_DGEmode;
14738
14739     case LEU:
14740       gcc_assert (cond1 == cond2);
14741       return CC_DLEUmode;
14742
14743     case GEU:
14744       gcc_assert (cond1 == cond2);
14745       return CC_DGEUmode;
14746
14747     default:
14748       gcc_unreachable ();
14749     }
14750 }
14751
14752 machine_mode
14753 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14754 {
14755   /* All floating point compares return CCFP if it is an equality
14756      comparison, and CCFPE otherwise.  */
14757   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14758     {
14759       switch (op)
14760         {
14761         case EQ:
14762         case NE:
14763         case UNORDERED:
14764         case ORDERED:
14765         case UNLT:
14766         case UNLE:
14767         case UNGT:
14768         case UNGE:
14769         case UNEQ:
14770         case LTGT:
14771           return CCFPmode;
14772
14773         case LT:
14774         case LE:
14775         case GT:
14776         case GE:
14777           return CCFPEmode;
14778
14779         default:
14780           gcc_unreachable ();
14781         }
14782     }
14783
14784   /* A compare with a shifted operand.  Because of canonicalization, the
14785      comparison will have to be swapped when we emit the assembler.  */
14786   if (GET_MODE (y) == SImode
14787       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14788       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14789           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14790           || GET_CODE (x) == ROTATERT))
14791     return CC_SWPmode;
14792
14793   /* This operation is performed swapped, but since we only rely on the Z
14794      flag we don't need an additional mode.  */
14795   if (GET_MODE (y) == SImode
14796       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14797       && GET_CODE (x) == NEG
14798       && (op == EQ || op == NE))
14799     return CC_Zmode;
14800
14801   /* This is a special case that is used by combine to allow a
14802      comparison of a shifted byte load to be split into a zero-extend
14803      followed by a comparison of the shifted integer (only valid for
14804      equalities and unsigned inequalities).  */
14805   if (GET_MODE (x) == SImode
14806       && GET_CODE (x) == ASHIFT
14807       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14808       && GET_CODE (XEXP (x, 0)) == SUBREG
14809       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14810       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14811       && (op == EQ || op == NE
14812           || op == GEU || op == GTU || op == LTU || op == LEU)
14813       && CONST_INT_P (y))
14814     return CC_Zmode;
14815
14816   /* A construct for a conditional compare, if the false arm contains
14817      0, then both conditions must be true, otherwise either condition
14818      must be true.  Not all conditions are possible, so CCmode is
14819      returned if it can't be done.  */
14820   if (GET_CODE (x) == IF_THEN_ELSE
14821       && (XEXP (x, 2) == const0_rtx
14822           || XEXP (x, 2) == const1_rtx)
14823       && COMPARISON_P (XEXP (x, 0))
14824       && COMPARISON_P (XEXP (x, 1)))
14825     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14826                                          INTVAL (XEXP (x, 2)));
14827
14828   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14829   if (GET_CODE (x) == AND
14830       && (op == EQ || op == NE)
14831       && COMPARISON_P (XEXP (x, 0))
14832       && COMPARISON_P (XEXP (x, 1)))
14833     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14834                                          DOM_CC_X_AND_Y);
14835
14836   if (GET_CODE (x) == IOR
14837       && (op == EQ || op == NE)
14838       && COMPARISON_P (XEXP (x, 0))
14839       && COMPARISON_P (XEXP (x, 1)))
14840     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14841                                          DOM_CC_X_OR_Y);
14842
14843   /* An operation (on Thumb) where we want to test for a single bit.
14844      This is done by shifting that bit up into the top bit of a
14845      scratch register; we can then branch on the sign bit.  */
14846   if (TARGET_THUMB1
14847       && GET_MODE (x) == SImode
14848       && (op == EQ || op == NE)
14849       && GET_CODE (x) == ZERO_EXTRACT
14850       && XEXP (x, 1) == const1_rtx)
14851     return CC_Nmode;
14852
14853   /* An operation that sets the condition codes as a side-effect, the
14854      V flag is not set correctly, so we can only use comparisons where
14855      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14856      instead.)  */
14857   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14858   if (GET_MODE (x) == SImode
14859       && y == const0_rtx
14860       && (op == EQ || op == NE || op == LT || op == GE)
14861       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14862           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14863           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14864           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14865           || GET_CODE (x) == LSHIFTRT
14866           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14867           || GET_CODE (x) == ROTATERT
14868           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14869     return CC_NOOVmode;
14870
14871   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14872     return CC_Zmode;
14873
14874   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14875       && GET_CODE (x) == PLUS
14876       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14877     return CC_Cmode;
14878
14879   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14880     {
14881       switch (op)
14882         {
14883         case EQ:
14884         case NE:
14885           /* A DImode comparison against zero can be implemented by
14886              or'ing the two halves together.  */
14887           if (y == const0_rtx)
14888             return CC_Zmode;
14889
14890           /* We can do an equality test in three Thumb instructions.  */
14891           if (!TARGET_32BIT)
14892             return CC_Zmode;
14893
14894           /* FALLTHROUGH */
14895
14896         case LTU:
14897         case LEU:
14898         case GTU:
14899         case GEU:
14900           /* DImode unsigned comparisons can be implemented by cmp +
14901              cmpeq without a scratch register.  Not worth doing in
14902              Thumb-2.  */
14903           if (TARGET_32BIT)
14904             return CC_CZmode;
14905
14906           /* FALLTHROUGH */
14907
14908         case LT:
14909         case LE:
14910         case GT:
14911         case GE:
14912           /* DImode signed and unsigned comparisons can be implemented
14913              by cmp + sbcs with a scratch register, but that does not
14914              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14915           gcc_assert (op != EQ && op != NE);
14916           return CC_NCVmode;
14917
14918         default:
14919           gcc_unreachable ();
14920         }
14921     }
14922
14923   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14924     return GET_MODE (x);
14925
14926   return CCmode;
14927 }
14928
14929 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14930    return the rtx for register 0 in the proper mode.  FP means this is a
14931    floating point compare: I don't think that it is needed on the arm.  */
14932 rtx
14933 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14934 {
14935   machine_mode mode;
14936   rtx cc_reg;
14937   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14938
14939   /* We might have X as a constant, Y as a register because of the predicates
14940      used for cmpdi.  If so, force X to a register here.  */
14941   if (dimode_comparison && !REG_P (x))
14942     x = force_reg (DImode, x);
14943
14944   mode = SELECT_CC_MODE (code, x, y);
14945   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14946
14947   if (dimode_comparison
14948       && mode != CC_CZmode)
14949     {
14950       rtx clobber, set;
14951
14952       /* To compare two non-zero values for equality, XOR them and
14953          then compare against zero.  Not used for ARM mode; there
14954          CC_CZmode is cheaper.  */
14955       if (mode == CC_Zmode && y != const0_rtx)
14956         {
14957           gcc_assert (!reload_completed);
14958           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14959           y = const0_rtx;
14960         }
14961
14962       /* A scratch register is required.  */
14963       if (reload_completed)
14964         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14965       else
14966         scratch = gen_rtx_SCRATCH (SImode);
14967
14968       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14969       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14970       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14971     }
14972   else
14973     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14974
14975   return cc_reg;
14976 }
14977
14978 /* Generate a sequence of insns that will generate the correct return
14979    address mask depending on the physical architecture that the program
14980    is running on.  */
14981 rtx
14982 arm_gen_return_addr_mask (void)
14983 {
14984   rtx reg = gen_reg_rtx (Pmode);
14985
14986   emit_insn (gen_return_addr_mask (reg));
14987   return reg;
14988 }
14989
14990 void
14991 arm_reload_in_hi (rtx *operands)
14992 {
14993   rtx ref = operands[1];
14994   rtx base, scratch;
14995   HOST_WIDE_INT offset = 0;
14996
14997   if (GET_CODE (ref) == SUBREG)
14998     {
14999       offset = SUBREG_BYTE (ref);
15000       ref = SUBREG_REG (ref);
15001     }
15002
15003   if (REG_P (ref))
15004     {
15005       /* We have a pseudo which has been spilt onto the stack; there
15006          are two cases here: the first where there is a simple
15007          stack-slot replacement and a second where the stack-slot is
15008          out of range, or is used as a subreg.  */
15009       if (reg_equiv_mem (REGNO (ref)))
15010         {
15011           ref = reg_equiv_mem (REGNO (ref));
15012           base = find_replacement (&XEXP (ref, 0));
15013         }
15014       else
15015         /* The slot is out of range, or was dressed up in a SUBREG.  */
15016         base = reg_equiv_address (REGNO (ref));
15017
15018       /* PR 62554: If there is no equivalent memory location then just move
15019          the value as an SImode register move.  This happens when the target
15020          architecture variant does not have an HImode register move.  */
15021       if (base == NULL)
15022         {
15023           gcc_assert (REG_P (operands[0]));
15024           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15025                                 gen_rtx_SUBREG (SImode, ref, 0)));
15026           return;
15027         }
15028     }
15029   else
15030     base = find_replacement (&XEXP (ref, 0));
15031
15032   /* Handle the case where the address is too complex to be offset by 1.  */
15033   if (GET_CODE (base) == MINUS
15034       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15035     {
15036       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15037
15038       emit_set_insn (base_plus, base);
15039       base = base_plus;
15040     }
15041   else if (GET_CODE (base) == PLUS)
15042     {
15043       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15044       HOST_WIDE_INT hi, lo;
15045
15046       offset += INTVAL (XEXP (base, 1));
15047       base = XEXP (base, 0);
15048
15049       /* Rework the address into a legal sequence of insns.  */
15050       /* Valid range for lo is -4095 -> 4095 */
15051       lo = (offset >= 0
15052             ? (offset & 0xfff)
15053             : -((-offset) & 0xfff));
15054
15055       /* Corner case, if lo is the max offset then we would be out of range
15056          once we have added the additional 1 below, so bump the msb into the
15057          pre-loading insn(s).  */
15058       if (lo == 4095)
15059         lo &= 0x7ff;
15060
15061       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15062              ^ (HOST_WIDE_INT) 0x80000000)
15063             - (HOST_WIDE_INT) 0x80000000);
15064
15065       gcc_assert (hi + lo == offset);
15066
15067       if (hi != 0)
15068         {
15069           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15070
15071           /* Get the base address; addsi3 knows how to handle constants
15072              that require more than one insn.  */
15073           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15074           base = base_plus;
15075           offset = lo;
15076         }
15077     }
15078
15079   /* Operands[2] may overlap operands[0] (though it won't overlap
15080      operands[1]), that's why we asked for a DImode reg -- so we can
15081      use the bit that does not overlap.  */
15082   if (REGNO (operands[2]) == REGNO (operands[0]))
15083     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15084   else
15085     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15086
15087   emit_insn (gen_zero_extendqisi2 (scratch,
15088                                    gen_rtx_MEM (QImode,
15089                                                 plus_constant (Pmode, base,
15090                                                                offset))));
15091   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15092                                    gen_rtx_MEM (QImode,
15093                                                 plus_constant (Pmode, base,
15094                                                                offset + 1))));
15095   if (!BYTES_BIG_ENDIAN)
15096     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15097                    gen_rtx_IOR (SImode,
15098                                 gen_rtx_ASHIFT
15099                                 (SImode,
15100                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15101                                  GEN_INT (8)),
15102                                 scratch));
15103   else
15104     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15105                    gen_rtx_IOR (SImode,
15106                                 gen_rtx_ASHIFT (SImode, scratch,
15107                                                 GEN_INT (8)),
15108                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15109 }
15110
15111 /* Handle storing a half-word to memory during reload by synthesizing as two
15112    byte stores.  Take care not to clobber the input values until after we
15113    have moved them somewhere safe.  This code assumes that if the DImode
15114    scratch in operands[2] overlaps either the input value or output address
15115    in some way, then that value must die in this insn (we absolutely need
15116    two scratch registers for some corner cases).  */
15117 void
15118 arm_reload_out_hi (rtx *operands)
15119 {
15120   rtx ref = operands[0];
15121   rtx outval = operands[1];
15122   rtx base, scratch;
15123   HOST_WIDE_INT offset = 0;
15124
15125   if (GET_CODE (ref) == SUBREG)
15126     {
15127       offset = SUBREG_BYTE (ref);
15128       ref = SUBREG_REG (ref);
15129     }
15130
15131   if (REG_P (ref))
15132     {
15133       /* We have a pseudo which has been spilt onto the stack; there
15134          are two cases here: the first where there is a simple
15135          stack-slot replacement and a second where the stack-slot is
15136          out of range, or is used as a subreg.  */
15137       if (reg_equiv_mem (REGNO (ref)))
15138         {
15139           ref = reg_equiv_mem (REGNO (ref));
15140           base = find_replacement (&XEXP (ref, 0));
15141         }
15142       else
15143         /* The slot is out of range, or was dressed up in a SUBREG.  */
15144         base = reg_equiv_address (REGNO (ref));
15145
15146       /* PR 62254: If there is no equivalent memory location then just move
15147          the value as an SImode register move.  This happens when the target
15148          architecture variant does not have an HImode register move.  */
15149       if (base == NULL)
15150         {
15151           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15152
15153           if (REG_P (outval))
15154             {
15155               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15156                                     gen_rtx_SUBREG (SImode, outval, 0)));
15157             }
15158           else /* SUBREG_P (outval)  */
15159             {
15160               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15161                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15162                                       SUBREG_REG (outval)));
15163               else
15164                 /* FIXME: Handle other cases ?  */
15165                 gcc_unreachable ();
15166             }
15167           return;
15168         }
15169     }
15170   else
15171     base = find_replacement (&XEXP (ref, 0));
15172
15173   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15174
15175   /* Handle the case where the address is too complex to be offset by 1.  */
15176   if (GET_CODE (base) == MINUS
15177       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15178     {
15179       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15180
15181       /* Be careful not to destroy OUTVAL.  */
15182       if (reg_overlap_mentioned_p (base_plus, outval))
15183         {
15184           /* Updating base_plus might destroy outval, see if we can
15185              swap the scratch and base_plus.  */
15186           if (!reg_overlap_mentioned_p (scratch, outval))
15187             std::swap (scratch, base_plus);
15188           else
15189             {
15190               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15191
15192               /* Be conservative and copy OUTVAL into the scratch now,
15193                  this should only be necessary if outval is a subreg
15194                  of something larger than a word.  */
15195               /* XXX Might this clobber base?  I can't see how it can,
15196                  since scratch is known to overlap with OUTVAL, and
15197                  must be wider than a word.  */
15198               emit_insn (gen_movhi (scratch_hi, outval));
15199               outval = scratch_hi;
15200             }
15201         }
15202
15203       emit_set_insn (base_plus, base);
15204       base = base_plus;
15205     }
15206   else if (GET_CODE (base) == PLUS)
15207     {
15208       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15209       HOST_WIDE_INT hi, lo;
15210
15211       offset += INTVAL (XEXP (base, 1));
15212       base = XEXP (base, 0);
15213
15214       /* Rework the address into a legal sequence of insns.  */
15215       /* Valid range for lo is -4095 -> 4095 */
15216       lo = (offset >= 0
15217             ? (offset & 0xfff)
15218             : -((-offset) & 0xfff));
15219
15220       /* Corner case, if lo is the max offset then we would be out of range
15221          once we have added the additional 1 below, so bump the msb into the
15222          pre-loading insn(s).  */
15223       if (lo == 4095)
15224         lo &= 0x7ff;
15225
15226       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15227              ^ (HOST_WIDE_INT) 0x80000000)
15228             - (HOST_WIDE_INT) 0x80000000);
15229
15230       gcc_assert (hi + lo == offset);
15231
15232       if (hi != 0)
15233         {
15234           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15235
15236           /* Be careful not to destroy OUTVAL.  */
15237           if (reg_overlap_mentioned_p (base_plus, outval))
15238             {
15239               /* Updating base_plus might destroy outval, see if we
15240                  can swap the scratch and base_plus.  */
15241               if (!reg_overlap_mentioned_p (scratch, outval))
15242                 std::swap (scratch, base_plus);
15243               else
15244                 {
15245                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15246
15247                   /* Be conservative and copy outval into scratch now,
15248                      this should only be necessary if outval is a
15249                      subreg of something larger than a word.  */
15250                   /* XXX Might this clobber base?  I can't see how it
15251                      can, since scratch is known to overlap with
15252                      outval.  */
15253                   emit_insn (gen_movhi (scratch_hi, outval));
15254                   outval = scratch_hi;
15255                 }
15256             }
15257
15258           /* Get the base address; addsi3 knows how to handle constants
15259              that require more than one insn.  */
15260           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15261           base = base_plus;
15262           offset = lo;
15263         }
15264     }
15265
15266   if (BYTES_BIG_ENDIAN)
15267     {
15268       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15269                                          plus_constant (Pmode, base,
15270                                                         offset + 1)),
15271                             gen_lowpart (QImode, outval)));
15272       emit_insn (gen_lshrsi3 (scratch,
15273                               gen_rtx_SUBREG (SImode, outval, 0),
15274                               GEN_INT (8)));
15275       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15276                                                                 offset)),
15277                             gen_lowpart (QImode, scratch)));
15278     }
15279   else
15280     {
15281       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15282                                                                 offset)),
15283                             gen_lowpart (QImode, outval)));
15284       emit_insn (gen_lshrsi3 (scratch,
15285                               gen_rtx_SUBREG (SImode, outval, 0),
15286                               GEN_INT (8)));
15287       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15288                                          plus_constant (Pmode, base,
15289                                                         offset + 1)),
15290                             gen_lowpart (QImode, scratch)));
15291     }
15292 }
15293
15294 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15295    (padded to the size of a word) should be passed in a register.  */
15296
15297 static bool
15298 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15299 {
15300   if (TARGET_AAPCS_BASED)
15301     return must_pass_in_stack_var_size (mode, type);
15302   else
15303     return must_pass_in_stack_var_size_or_pad (mode, type);
15304 }
15305
15306
15307 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15308    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15309    the default.  For AAPCS based ABIs small aggregate types are placed
15310    in the lowest memory address.  */
15311
15312 static pad_direction
15313 arm_function_arg_padding (machine_mode mode, const_tree type)
15314 {
15315   if (!TARGET_AAPCS_BASED)
15316     return default_function_arg_padding (mode, type);
15317
15318   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15319     return PAD_DOWNWARD;
15320
15321   return PAD_UPWARD;
15322 }
15323
15324
15325 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15326    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15327    register has useful data, and return the opposite if the most
15328    significant byte does.  */
15329
15330 bool
15331 arm_pad_reg_upward (machine_mode mode,
15332                     tree type, int first ATTRIBUTE_UNUSED)
15333 {
15334   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15335     {
15336       /* For AAPCS, small aggregates, small fixed-point types,
15337          and small complex types are always padded upwards.  */
15338       if (type)
15339         {
15340           if ((AGGREGATE_TYPE_P (type)
15341                || TREE_CODE (type) == COMPLEX_TYPE
15342                || FIXED_POINT_TYPE_P (type))
15343               && int_size_in_bytes (type) <= 4)
15344             return true;
15345         }
15346       else
15347         {
15348           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15349               && GET_MODE_SIZE (mode) <= 4)
15350             return true;
15351         }
15352     }
15353
15354   /* Otherwise, use default padding.  */
15355   return !BYTES_BIG_ENDIAN;
15356 }
15357
15358 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15359    assuming that the address in the base register is word aligned.  */
15360 bool
15361 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15362 {
15363   HOST_WIDE_INT max_offset;
15364
15365   /* Offset must be a multiple of 4 in Thumb mode.  */
15366   if (TARGET_THUMB2 && ((offset & 3) != 0))
15367     return false;
15368
15369   if (TARGET_THUMB2)
15370     max_offset = 1020;
15371   else if (TARGET_ARM)
15372     max_offset = 255;
15373   else
15374     return false;
15375
15376   return ((offset <= max_offset) && (offset >= -max_offset));
15377 }
15378
15379 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15380    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15381    Assumes that the address in the base register RN is word aligned.  Pattern
15382    guarantees that both memory accesses use the same base register,
15383    the offsets are constants within the range, and the gap between the offsets is 4.
15384    If preload complete then check that registers are legal.  WBACK indicates whether
15385    address is updated.  LOAD indicates whether memory access is load or store.  */
15386 bool
15387 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15388                        bool wback, bool load)
15389 {
15390   unsigned int t, t2, n;
15391
15392   if (!reload_completed)
15393     return true;
15394
15395   if (!offset_ok_for_ldrd_strd (offset))
15396     return false;
15397
15398   t = REGNO (rt);
15399   t2 = REGNO (rt2);
15400   n = REGNO (rn);
15401
15402   if ((TARGET_THUMB2)
15403       && ((wback && (n == t || n == t2))
15404           || (t == SP_REGNUM)
15405           || (t == PC_REGNUM)
15406           || (t2 == SP_REGNUM)
15407           || (t2 == PC_REGNUM)
15408           || (!load && (n == PC_REGNUM))
15409           || (load && (t == t2))
15410           /* Triggers Cortex-M3 LDRD errata.  */
15411           || (!wback && load && fix_cm3_ldrd && (n == t))))
15412     return false;
15413
15414   if ((TARGET_ARM)
15415       && ((wback && (n == t || n == t2))
15416           || (t2 == PC_REGNUM)
15417           || (t % 2 != 0)   /* First destination register is not even.  */
15418           || (t2 != t + 1)
15419           /* PC can be used as base register (for offset addressing only),
15420              but it is depricated.  */
15421           || (n == PC_REGNUM)))
15422     return false;
15423
15424   return true;
15425 }
15426
15427 /* Return true if a 64-bit access with alignment ALIGN and with a
15428    constant offset OFFSET from the base pointer is permitted on this
15429    architecture.  */
15430 static bool
15431 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15432 {
15433   return (unaligned_access
15434           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15435           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15436 }
15437
15438 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15439    operand MEM's address contains an immediate offset from the base
15440    register and has no side effects, in which case it sets BASE,
15441    OFFSET and ALIGN accordingly.  */
15442 static bool
15443 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15444 {
15445   rtx addr;
15446
15447   gcc_assert (base != NULL && offset != NULL);
15448
15449   /* TODO: Handle more general memory operand patterns, such as
15450      PRE_DEC and PRE_INC.  */
15451
15452   if (side_effects_p (mem))
15453     return false;
15454
15455   /* Can't deal with subregs.  */
15456   if (GET_CODE (mem) == SUBREG)
15457     return false;
15458
15459   gcc_assert (MEM_P (mem));
15460
15461   *offset = const0_rtx;
15462   *align = MEM_ALIGN (mem);
15463
15464   addr = XEXP (mem, 0);
15465
15466   /* If addr isn't valid for DImode, then we can't handle it.  */
15467   if (!arm_legitimate_address_p (DImode, addr,
15468                                  reload_in_progress || reload_completed))
15469     return false;
15470
15471   if (REG_P (addr))
15472     {
15473       *base = addr;
15474       return true;
15475     }
15476   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15477     {
15478       *base = XEXP (addr, 0);
15479       *offset = XEXP (addr, 1);
15480       return (REG_P (*base) && CONST_INT_P (*offset));
15481     }
15482
15483   return false;
15484 }
15485
15486 /* Called from a peephole2 to replace two word-size accesses with a
15487    single LDRD/STRD instruction.  Returns true iff we can generate a
15488    new instruction sequence.  That is, both accesses use the same base
15489    register and the gap between constant offsets is 4.  This function
15490    may reorder its operands to match ldrd/strd RTL templates.
15491    OPERANDS are the operands found by the peephole matcher;
15492    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15493    corresponding memory operands.  LOAD indicaates whether the access
15494    is load or store.  CONST_STORE indicates a store of constant
15495    integer values held in OPERANDS[4,5] and assumes that the pattern
15496    is of length 4 insn, for the purpose of checking dead registers.
15497    COMMUTE indicates that register operands may be reordered.  */
15498 bool
15499 gen_operands_ldrd_strd (rtx *operands, bool load,
15500                         bool const_store, bool commute)
15501 {
15502   int nops = 2;
15503   HOST_WIDE_INT offsets[2], offset, align[2];
15504   rtx base = NULL_RTX;
15505   rtx cur_base, cur_offset, tmp;
15506   int i, gap;
15507   HARD_REG_SET regset;
15508
15509   gcc_assert (!const_store || !load);
15510   /* Check that the memory references are immediate offsets from the
15511      same base register.  Extract the base register, the destination
15512      registers, and the corresponding memory offsets.  */
15513   for (i = 0; i < nops; i++)
15514     {
15515       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15516                                  &align[i]))
15517         return false;
15518
15519       if (i == 0)
15520         base = cur_base;
15521       else if (REGNO (base) != REGNO (cur_base))
15522         return false;
15523
15524       offsets[i] = INTVAL (cur_offset);
15525       if (GET_CODE (operands[i]) == SUBREG)
15526         {
15527           tmp = SUBREG_REG (operands[i]);
15528           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15529           operands[i] = tmp;
15530         }
15531     }
15532
15533   /* Make sure there is no dependency between the individual loads.  */
15534   if (load && REGNO (operands[0]) == REGNO (base))
15535     return false; /* RAW */
15536
15537   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15538     return false; /* WAW */
15539
15540   /* If the same input register is used in both stores
15541      when storing different constants, try to find a free register.
15542      For example, the code
15543         mov r0, 0
15544         str r0, [r2]
15545         mov r0, 1
15546         str r0, [r2, #4]
15547      can be transformed into
15548         mov r1, 0
15549         mov r0, 1
15550         strd r1, r0, [r2]
15551      in Thumb mode assuming that r1 is free.
15552      For ARM mode do the same but only if the starting register
15553      can be made to be even.  */
15554   if (const_store
15555       && REGNO (operands[0]) == REGNO (operands[1])
15556       && INTVAL (operands[4]) != INTVAL (operands[5]))
15557     {
15558     if (TARGET_THUMB2)
15559       {
15560         CLEAR_HARD_REG_SET (regset);
15561         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15562         if (tmp == NULL_RTX)
15563           return false;
15564
15565         /* Use the new register in the first load to ensure that
15566            if the original input register is not dead after peephole,
15567            then it will have the correct constant value.  */
15568         operands[0] = tmp;
15569       }
15570     else if (TARGET_ARM)
15571       {
15572         int regno = REGNO (operands[0]);
15573         if (!peep2_reg_dead_p (4, operands[0]))
15574           {
15575             /* When the input register is even and is not dead after the
15576                pattern, it has to hold the second constant but we cannot
15577                form a legal STRD in ARM mode with this register as the second
15578                register.  */
15579             if (regno % 2 == 0)
15580               return false;
15581
15582             /* Is regno-1 free? */
15583             SET_HARD_REG_SET (regset);
15584             CLEAR_HARD_REG_BIT(regset, regno - 1);
15585             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15586             if (tmp == NULL_RTX)
15587               return false;
15588
15589             operands[0] = tmp;
15590           }
15591         else
15592           {
15593             /* Find a DImode register.  */
15594             CLEAR_HARD_REG_SET (regset);
15595             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15596             if (tmp != NULL_RTX)
15597               {
15598                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15599                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15600               }
15601             else
15602               {
15603                 /* Can we use the input register to form a DI register?  */
15604                 SET_HARD_REG_SET (regset);
15605                 CLEAR_HARD_REG_BIT(regset,
15606                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15607                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15608                 if (tmp == NULL_RTX)
15609                   return false;
15610                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15611               }
15612           }
15613
15614         gcc_assert (operands[0] != NULL_RTX);
15615         gcc_assert (operands[1] != NULL_RTX);
15616         gcc_assert (REGNO (operands[0]) % 2 == 0);
15617         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15618       }
15619     }
15620
15621   /* Make sure the instructions are ordered with lower memory access first.  */
15622   if (offsets[0] > offsets[1])
15623     {
15624       gap = offsets[0] - offsets[1];
15625       offset = offsets[1];
15626
15627       /* Swap the instructions such that lower memory is accessed first.  */
15628       std::swap (operands[0], operands[1]);
15629       std::swap (operands[2], operands[3]);
15630       std::swap (align[0], align[1]);
15631       if (const_store)
15632         std::swap (operands[4], operands[5]);
15633     }
15634   else
15635     {
15636       gap = offsets[1] - offsets[0];
15637       offset = offsets[0];
15638     }
15639
15640   /* Make sure accesses are to consecutive memory locations.  */
15641   if (gap != 4)
15642     return false;
15643
15644   if (!align_ok_ldrd_strd (align[0], offset))
15645     return false;
15646
15647   /* Make sure we generate legal instructions.  */
15648   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15649                              false, load))
15650     return true;
15651
15652   /* In Thumb state, where registers are almost unconstrained, there
15653      is little hope to fix it.  */
15654   if (TARGET_THUMB2)
15655     return false;
15656
15657   if (load && commute)
15658     {
15659       /* Try reordering registers.  */
15660       std::swap (operands[0], operands[1]);
15661       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15662                                  false, load))
15663         return true;
15664     }
15665
15666   if (const_store)
15667     {
15668       /* If input registers are dead after this pattern, they can be
15669          reordered or replaced by other registers that are free in the
15670          current pattern.  */
15671       if (!peep2_reg_dead_p (4, operands[0])
15672           || !peep2_reg_dead_p (4, operands[1]))
15673         return false;
15674
15675       /* Try to reorder the input registers.  */
15676       /* For example, the code
15677            mov r0, 0
15678            mov r1, 1
15679            str r1, [r2]
15680            str r0, [r2, #4]
15681          can be transformed into
15682            mov r1, 0
15683            mov r0, 1
15684            strd r0, [r2]
15685       */
15686       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15687                                   false, false))
15688         {
15689           std::swap (operands[0], operands[1]);
15690           return true;
15691         }
15692
15693       /* Try to find a free DI register.  */
15694       CLEAR_HARD_REG_SET (regset);
15695       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15696       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15697       while (true)
15698         {
15699           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15700           if (tmp == NULL_RTX)
15701             return false;
15702
15703           /* DREG must be an even-numbered register in DImode.
15704              Split it into SI registers.  */
15705           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15706           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15707           gcc_assert (operands[0] != NULL_RTX);
15708           gcc_assert (operands[1] != NULL_RTX);
15709           gcc_assert (REGNO (operands[0]) % 2 == 0);
15710           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15711
15712           return (operands_ok_ldrd_strd (operands[0], operands[1],
15713                                          base, offset,
15714                                          false, load));
15715         }
15716     }
15717
15718   return false;
15719 }
15720
15721
15722
15723 \f
15724 /* Print a symbolic form of X to the debug file, F.  */
15725 static void
15726 arm_print_value (FILE *f, rtx x)
15727 {
15728   switch (GET_CODE (x))
15729     {
15730     case CONST_INT:
15731       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15732       return;
15733
15734     case CONST_DOUBLE:
15735       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15736       return;
15737
15738     case CONST_VECTOR:
15739       {
15740         int i;
15741
15742         fprintf (f, "<");
15743         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15744           {
15745             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15746             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15747               fputc (',', f);
15748           }
15749         fprintf (f, ">");
15750       }
15751       return;
15752
15753     case CONST_STRING:
15754       fprintf (f, "\"%s\"", XSTR (x, 0));
15755       return;
15756
15757     case SYMBOL_REF:
15758       fprintf (f, "`%s'", XSTR (x, 0));
15759       return;
15760
15761     case LABEL_REF:
15762       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15763       return;
15764
15765     case CONST:
15766       arm_print_value (f, XEXP (x, 0));
15767       return;
15768
15769     case PLUS:
15770       arm_print_value (f, XEXP (x, 0));
15771       fprintf (f, "+");
15772       arm_print_value (f, XEXP (x, 1));
15773       return;
15774
15775     case PC:
15776       fprintf (f, "pc");
15777       return;
15778
15779     default:
15780       fprintf (f, "????");
15781       return;
15782     }
15783 }
15784 \f
15785 /* Routines for manipulation of the constant pool.  */
15786
15787 /* Arm instructions cannot load a large constant directly into a
15788    register; they have to come from a pc relative load.  The constant
15789    must therefore be placed in the addressable range of the pc
15790    relative load.  Depending on the precise pc relative load
15791    instruction the range is somewhere between 256 bytes and 4k.  This
15792    means that we often have to dump a constant inside a function, and
15793    generate code to branch around it.
15794
15795    It is important to minimize this, since the branches will slow
15796    things down and make the code larger.
15797
15798    Normally we can hide the table after an existing unconditional
15799    branch so that there is no interruption of the flow, but in the
15800    worst case the code looks like this:
15801
15802         ldr     rn, L1
15803         ...
15804         b       L2
15805         align
15806         L1:     .long value
15807         L2:
15808         ...
15809
15810         ldr     rn, L3
15811         ...
15812         b       L4
15813         align
15814         L3:     .long value
15815         L4:
15816         ...
15817
15818    We fix this by performing a scan after scheduling, which notices
15819    which instructions need to have their operands fetched from the
15820    constant table and builds the table.
15821
15822    The algorithm starts by building a table of all the constants that
15823    need fixing up and all the natural barriers in the function (places
15824    where a constant table can be dropped without breaking the flow).
15825    For each fixup we note how far the pc-relative replacement will be
15826    able to reach and the offset of the instruction into the function.
15827
15828    Having built the table we then group the fixes together to form
15829    tables that are as large as possible (subject to addressing
15830    constraints) and emit each table of constants after the last
15831    barrier that is within range of all the instructions in the group.
15832    If a group does not contain a barrier, then we forcibly create one
15833    by inserting a jump instruction into the flow.  Once the table has
15834    been inserted, the insns are then modified to reference the
15835    relevant entry in the pool.
15836
15837    Possible enhancements to the algorithm (not implemented) are:
15838
15839    1) For some processors and object formats, there may be benefit in
15840    aligning the pools to the start of cache lines; this alignment
15841    would need to be taken into account when calculating addressability
15842    of a pool.  */
15843
15844 /* These typedefs are located at the start of this file, so that
15845    they can be used in the prototypes there.  This comment is to
15846    remind readers of that fact so that the following structures
15847    can be understood more easily.
15848
15849      typedef struct minipool_node    Mnode;
15850      typedef struct minipool_fixup   Mfix;  */
15851
15852 struct minipool_node
15853 {
15854   /* Doubly linked chain of entries.  */
15855   Mnode * next;
15856   Mnode * prev;
15857   /* The maximum offset into the code that this entry can be placed.  While
15858      pushing fixes for forward references, all entries are sorted in order
15859      of increasing max_address.  */
15860   HOST_WIDE_INT max_address;
15861   /* Similarly for an entry inserted for a backwards ref.  */
15862   HOST_WIDE_INT min_address;
15863   /* The number of fixes referencing this entry.  This can become zero
15864      if we "unpush" an entry.  In this case we ignore the entry when we
15865      come to emit the code.  */
15866   int refcount;
15867   /* The offset from the start of the minipool.  */
15868   HOST_WIDE_INT offset;
15869   /* The value in table.  */
15870   rtx value;
15871   /* The mode of value.  */
15872   machine_mode mode;
15873   /* The size of the value.  With iWMMXt enabled
15874      sizes > 4 also imply an alignment of 8-bytes.  */
15875   int fix_size;
15876 };
15877
15878 struct minipool_fixup
15879 {
15880   Mfix *            next;
15881   rtx_insn *        insn;
15882   HOST_WIDE_INT     address;
15883   rtx *             loc;
15884   machine_mode mode;
15885   int               fix_size;
15886   rtx               value;
15887   Mnode *           minipool;
15888   HOST_WIDE_INT     forwards;
15889   HOST_WIDE_INT     backwards;
15890 };
15891
15892 /* Fixes less than a word need padding out to a word boundary.  */
15893 #define MINIPOOL_FIX_SIZE(mode) \
15894   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15895
15896 static Mnode *  minipool_vector_head;
15897 static Mnode *  minipool_vector_tail;
15898 static rtx_code_label   *minipool_vector_label;
15899 static int      minipool_pad;
15900
15901 /* The linked list of all minipool fixes required for this function.  */
15902 Mfix *          minipool_fix_head;
15903 Mfix *          minipool_fix_tail;
15904 /* The fix entry for the current minipool, once it has been placed.  */
15905 Mfix *          minipool_barrier;
15906
15907 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15908 #define JUMP_TABLES_IN_TEXT_SECTION 0
15909 #endif
15910
15911 static HOST_WIDE_INT
15912 get_jump_table_size (rtx_jump_table_data *insn)
15913 {
15914   /* ADDR_VECs only take room if read-only data does into the text
15915      section.  */
15916   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15917     {
15918       rtx body = PATTERN (insn);
15919       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15920       HOST_WIDE_INT size;
15921       HOST_WIDE_INT modesize;
15922
15923       modesize = GET_MODE_SIZE (GET_MODE (body));
15924       size = modesize * XVECLEN (body, elt);
15925       switch (modesize)
15926         {
15927         case 1:
15928           /* Round up size  of TBB table to a halfword boundary.  */
15929           size = (size + 1) & ~HOST_WIDE_INT_1;
15930           break;
15931         case 2:
15932           /* No padding necessary for TBH.  */
15933           break;
15934         case 4:
15935           /* Add two bytes for alignment on Thumb.  */
15936           if (TARGET_THUMB)
15937             size += 2;
15938           break;
15939         default:
15940           gcc_unreachable ();
15941         }
15942       return size;
15943     }
15944
15945   return 0;
15946 }
15947
15948 /* Return the maximum amount of padding that will be inserted before
15949    label LABEL.  */
15950
15951 static HOST_WIDE_INT
15952 get_label_padding (rtx label)
15953 {
15954   HOST_WIDE_INT align, min_insn_size;
15955
15956   align = 1 << label_to_alignment (label);
15957   min_insn_size = TARGET_THUMB ? 2 : 4;
15958   return align > min_insn_size ? align - min_insn_size : 0;
15959 }
15960
15961 /* Move a minipool fix MP from its current location to before MAX_MP.
15962    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15963    constraints may need updating.  */
15964 static Mnode *
15965 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15966                                HOST_WIDE_INT max_address)
15967 {
15968   /* The code below assumes these are different.  */
15969   gcc_assert (mp != max_mp);
15970
15971   if (max_mp == NULL)
15972     {
15973       if (max_address < mp->max_address)
15974         mp->max_address = max_address;
15975     }
15976   else
15977     {
15978       if (max_address > max_mp->max_address - mp->fix_size)
15979         mp->max_address = max_mp->max_address - mp->fix_size;
15980       else
15981         mp->max_address = max_address;
15982
15983       /* Unlink MP from its current position.  Since max_mp is non-null,
15984        mp->prev must be non-null.  */
15985       mp->prev->next = mp->next;
15986       if (mp->next != NULL)
15987         mp->next->prev = mp->prev;
15988       else
15989         minipool_vector_tail = mp->prev;
15990
15991       /* Re-insert it before MAX_MP.  */
15992       mp->next = max_mp;
15993       mp->prev = max_mp->prev;
15994       max_mp->prev = mp;
15995
15996       if (mp->prev != NULL)
15997         mp->prev->next = mp;
15998       else
15999         minipool_vector_head = mp;
16000     }
16001
16002   /* Save the new entry.  */
16003   max_mp = mp;
16004
16005   /* Scan over the preceding entries and adjust their addresses as
16006      required.  */
16007   while (mp->prev != NULL
16008          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16009     {
16010       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16011       mp = mp->prev;
16012     }
16013
16014   return max_mp;
16015 }
16016
16017 /* Add a constant to the minipool for a forward reference.  Returns the
16018    node added or NULL if the constant will not fit in this pool.  */
16019 static Mnode *
16020 add_minipool_forward_ref (Mfix *fix)
16021 {
16022   /* If set, max_mp is the first pool_entry that has a lower
16023      constraint than the one we are trying to add.  */
16024   Mnode *       max_mp = NULL;
16025   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16026   Mnode *       mp;
16027
16028   /* If the minipool starts before the end of FIX->INSN then this FIX
16029      can not be placed into the current pool.  Furthermore, adding the
16030      new constant pool entry may cause the pool to start FIX_SIZE bytes
16031      earlier.  */
16032   if (minipool_vector_head &&
16033       (fix->address + get_attr_length (fix->insn)
16034        >= minipool_vector_head->max_address - fix->fix_size))
16035     return NULL;
16036
16037   /* Scan the pool to see if a constant with the same value has
16038      already been added.  While we are doing this, also note the
16039      location where we must insert the constant if it doesn't already
16040      exist.  */
16041   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16042     {
16043       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16044           && fix->mode == mp->mode
16045           && (!LABEL_P (fix->value)
16046               || (CODE_LABEL_NUMBER (fix->value)
16047                   == CODE_LABEL_NUMBER (mp->value)))
16048           && rtx_equal_p (fix->value, mp->value))
16049         {
16050           /* More than one fix references this entry.  */
16051           mp->refcount++;
16052           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16053         }
16054
16055       /* Note the insertion point if necessary.  */
16056       if (max_mp == NULL
16057           && mp->max_address > max_address)
16058         max_mp = mp;
16059
16060       /* If we are inserting an 8-bytes aligned quantity and
16061          we have not already found an insertion point, then
16062          make sure that all such 8-byte aligned quantities are
16063          placed at the start of the pool.  */
16064       if (ARM_DOUBLEWORD_ALIGN
16065           && max_mp == NULL
16066           && fix->fix_size >= 8
16067           && mp->fix_size < 8)
16068         {
16069           max_mp = mp;
16070           max_address = mp->max_address;
16071         }
16072     }
16073
16074   /* The value is not currently in the minipool, so we need to create
16075      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16076      the end of the list since the placement is less constrained than
16077      any existing entry.  Otherwise, we insert the new fix before
16078      MAX_MP and, if necessary, adjust the constraints on the other
16079      entries.  */
16080   mp = XNEW (Mnode);
16081   mp->fix_size = fix->fix_size;
16082   mp->mode = fix->mode;
16083   mp->value = fix->value;
16084   mp->refcount = 1;
16085   /* Not yet required for a backwards ref.  */
16086   mp->min_address = -65536;
16087
16088   if (max_mp == NULL)
16089     {
16090       mp->max_address = max_address;
16091       mp->next = NULL;
16092       mp->prev = minipool_vector_tail;
16093
16094       if (mp->prev == NULL)
16095         {
16096           minipool_vector_head = mp;
16097           minipool_vector_label = gen_label_rtx ();
16098         }
16099       else
16100         mp->prev->next = mp;
16101
16102       minipool_vector_tail = mp;
16103     }
16104   else
16105     {
16106       if (max_address > max_mp->max_address - mp->fix_size)
16107         mp->max_address = max_mp->max_address - mp->fix_size;
16108       else
16109         mp->max_address = max_address;
16110
16111       mp->next = max_mp;
16112       mp->prev = max_mp->prev;
16113       max_mp->prev = mp;
16114       if (mp->prev != NULL)
16115         mp->prev->next = mp;
16116       else
16117         minipool_vector_head = mp;
16118     }
16119
16120   /* Save the new entry.  */
16121   max_mp = mp;
16122
16123   /* Scan over the preceding entries and adjust their addresses as
16124      required.  */
16125   while (mp->prev != NULL
16126          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16127     {
16128       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16129       mp = mp->prev;
16130     }
16131
16132   return max_mp;
16133 }
16134
16135 static Mnode *
16136 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16137                                 HOST_WIDE_INT  min_address)
16138 {
16139   HOST_WIDE_INT offset;
16140
16141   /* The code below assumes these are different.  */
16142   gcc_assert (mp != min_mp);
16143
16144   if (min_mp == NULL)
16145     {
16146       if (min_address > mp->min_address)
16147         mp->min_address = min_address;
16148     }
16149   else
16150     {
16151       /* We will adjust this below if it is too loose.  */
16152       mp->min_address = min_address;
16153
16154       /* Unlink MP from its current position.  Since min_mp is non-null,
16155          mp->next must be non-null.  */
16156       mp->next->prev = mp->prev;
16157       if (mp->prev != NULL)
16158         mp->prev->next = mp->next;
16159       else
16160         minipool_vector_head = mp->next;
16161
16162       /* Reinsert it after MIN_MP.  */
16163       mp->prev = min_mp;
16164       mp->next = min_mp->next;
16165       min_mp->next = mp;
16166       if (mp->next != NULL)
16167         mp->next->prev = mp;
16168       else
16169         minipool_vector_tail = mp;
16170     }
16171
16172   min_mp = mp;
16173
16174   offset = 0;
16175   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16176     {
16177       mp->offset = offset;
16178       if (mp->refcount > 0)
16179         offset += mp->fix_size;
16180
16181       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16182         mp->next->min_address = mp->min_address + mp->fix_size;
16183     }
16184
16185   return min_mp;
16186 }
16187
16188 /* Add a constant to the minipool for a backward reference.  Returns the
16189    node added or NULL if the constant will not fit in this pool.
16190
16191    Note that the code for insertion for a backwards reference can be
16192    somewhat confusing because the calculated offsets for each fix do
16193    not take into account the size of the pool (which is still under
16194    construction.  */
16195 static Mnode *
16196 add_minipool_backward_ref (Mfix *fix)
16197 {
16198   /* If set, min_mp is the last pool_entry that has a lower constraint
16199      than the one we are trying to add.  */
16200   Mnode *min_mp = NULL;
16201   /* This can be negative, since it is only a constraint.  */
16202   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16203   Mnode *mp;
16204
16205   /* If we can't reach the current pool from this insn, or if we can't
16206      insert this entry at the end of the pool without pushing other
16207      fixes out of range, then we don't try.  This ensures that we
16208      can't fail later on.  */
16209   if (min_address >= minipool_barrier->address
16210       || (minipool_vector_tail->min_address + fix->fix_size
16211           >= minipool_barrier->address))
16212     return NULL;
16213
16214   /* Scan the pool to see if a constant with the same value has
16215      already been added.  While we are doing this, also note the
16216      location where we must insert the constant if it doesn't already
16217      exist.  */
16218   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16219     {
16220       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16221           && fix->mode == mp->mode
16222           && (!LABEL_P (fix->value)
16223               || (CODE_LABEL_NUMBER (fix->value)
16224                   == CODE_LABEL_NUMBER (mp->value)))
16225           && rtx_equal_p (fix->value, mp->value)
16226           /* Check that there is enough slack to move this entry to the
16227              end of the table (this is conservative).  */
16228           && (mp->max_address
16229               > (minipool_barrier->address
16230                  + minipool_vector_tail->offset
16231                  + minipool_vector_tail->fix_size)))
16232         {
16233           mp->refcount++;
16234           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16235         }
16236
16237       if (min_mp != NULL)
16238         mp->min_address += fix->fix_size;
16239       else
16240         {
16241           /* Note the insertion point if necessary.  */
16242           if (mp->min_address < min_address)
16243             {
16244               /* For now, we do not allow the insertion of 8-byte alignment
16245                  requiring nodes anywhere but at the start of the pool.  */
16246               if (ARM_DOUBLEWORD_ALIGN
16247                   && fix->fix_size >= 8 && mp->fix_size < 8)
16248                 return NULL;
16249               else
16250                 min_mp = mp;
16251             }
16252           else if (mp->max_address
16253                    < minipool_barrier->address + mp->offset + fix->fix_size)
16254             {
16255               /* Inserting before this entry would push the fix beyond
16256                  its maximum address (which can happen if we have
16257                  re-located a forwards fix); force the new fix to come
16258                  after it.  */
16259               if (ARM_DOUBLEWORD_ALIGN
16260                   && fix->fix_size >= 8 && mp->fix_size < 8)
16261                 return NULL;
16262               else
16263                 {
16264                   min_mp = mp;
16265                   min_address = mp->min_address + fix->fix_size;
16266                 }
16267             }
16268           /* Do not insert a non-8-byte aligned quantity before 8-byte
16269              aligned quantities.  */
16270           else if (ARM_DOUBLEWORD_ALIGN
16271                    && fix->fix_size < 8
16272                    && mp->fix_size >= 8)
16273             {
16274               min_mp = mp;
16275               min_address = mp->min_address + fix->fix_size;
16276             }
16277         }
16278     }
16279
16280   /* We need to create a new entry.  */
16281   mp = XNEW (Mnode);
16282   mp->fix_size = fix->fix_size;
16283   mp->mode = fix->mode;
16284   mp->value = fix->value;
16285   mp->refcount = 1;
16286   mp->max_address = minipool_barrier->address + 65536;
16287
16288   mp->min_address = min_address;
16289
16290   if (min_mp == NULL)
16291     {
16292       mp->prev = NULL;
16293       mp->next = minipool_vector_head;
16294
16295       if (mp->next == NULL)
16296         {
16297           minipool_vector_tail = mp;
16298           minipool_vector_label = gen_label_rtx ();
16299         }
16300       else
16301         mp->next->prev = mp;
16302
16303       minipool_vector_head = mp;
16304     }
16305   else
16306     {
16307       mp->next = min_mp->next;
16308       mp->prev = min_mp;
16309       min_mp->next = mp;
16310
16311       if (mp->next != NULL)
16312         mp->next->prev = mp;
16313       else
16314         minipool_vector_tail = mp;
16315     }
16316
16317   /* Save the new entry.  */
16318   min_mp = mp;
16319
16320   if (mp->prev)
16321     mp = mp->prev;
16322   else
16323     mp->offset = 0;
16324
16325   /* Scan over the following entries and adjust their offsets.  */
16326   while (mp->next != NULL)
16327     {
16328       if (mp->next->min_address < mp->min_address + mp->fix_size)
16329         mp->next->min_address = mp->min_address + mp->fix_size;
16330
16331       if (mp->refcount)
16332         mp->next->offset = mp->offset + mp->fix_size;
16333       else
16334         mp->next->offset = mp->offset;
16335
16336       mp = mp->next;
16337     }
16338
16339   return min_mp;
16340 }
16341
16342 static void
16343 assign_minipool_offsets (Mfix *barrier)
16344 {
16345   HOST_WIDE_INT offset = 0;
16346   Mnode *mp;
16347
16348   minipool_barrier = barrier;
16349
16350   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16351     {
16352       mp->offset = offset;
16353
16354       if (mp->refcount > 0)
16355         offset += mp->fix_size;
16356     }
16357 }
16358
16359 /* Output the literal table */
16360 static void
16361 dump_minipool (rtx_insn *scan)
16362 {
16363   Mnode * mp;
16364   Mnode * nmp;
16365   int align64 = 0;
16366
16367   if (ARM_DOUBLEWORD_ALIGN)
16368     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16369       if (mp->refcount > 0 && mp->fix_size >= 8)
16370         {
16371           align64 = 1;
16372           break;
16373         }
16374
16375   if (dump_file)
16376     fprintf (dump_file,
16377              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16378              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16379
16380   scan = emit_label_after (gen_label_rtx (), scan);
16381   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16382   scan = emit_label_after (minipool_vector_label, scan);
16383
16384   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16385     {
16386       if (mp->refcount > 0)
16387         {
16388           if (dump_file)
16389             {
16390               fprintf (dump_file,
16391                        ";;  Offset %u, min %ld, max %ld ",
16392                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16393                        (unsigned long) mp->max_address);
16394               arm_print_value (dump_file, mp->value);
16395               fputc ('\n', dump_file);
16396             }
16397
16398           rtx val = copy_rtx (mp->value);
16399
16400           switch (GET_MODE_SIZE (mp->mode))
16401             {
16402 #ifdef HAVE_consttable_1
16403             case 1:
16404               scan = emit_insn_after (gen_consttable_1 (val), scan);
16405               break;
16406
16407 #endif
16408 #ifdef HAVE_consttable_2
16409             case 2:
16410               scan = emit_insn_after (gen_consttable_2 (val), scan);
16411               break;
16412
16413 #endif
16414 #ifdef HAVE_consttable_4
16415             case 4:
16416               scan = emit_insn_after (gen_consttable_4 (val), scan);
16417               break;
16418
16419 #endif
16420 #ifdef HAVE_consttable_8
16421             case 8:
16422               scan = emit_insn_after (gen_consttable_8 (val), scan);
16423               break;
16424
16425 #endif
16426 #ifdef HAVE_consttable_16
16427             case 16:
16428               scan = emit_insn_after (gen_consttable_16 (val), scan);
16429               break;
16430
16431 #endif
16432             default:
16433               gcc_unreachable ();
16434             }
16435         }
16436
16437       nmp = mp->next;
16438       free (mp);
16439     }
16440
16441   minipool_vector_head = minipool_vector_tail = NULL;
16442   scan = emit_insn_after (gen_consttable_end (), scan);
16443   scan = emit_barrier_after (scan);
16444 }
16445
16446 /* Return the cost of forcibly inserting a barrier after INSN.  */
16447 static int
16448 arm_barrier_cost (rtx_insn *insn)
16449 {
16450   /* Basing the location of the pool on the loop depth is preferable,
16451      but at the moment, the basic block information seems to be
16452      corrupt by this stage of the compilation.  */
16453   int base_cost = 50;
16454   rtx_insn *next = next_nonnote_insn (insn);
16455
16456   if (next != NULL && LABEL_P (next))
16457     base_cost -= 20;
16458
16459   switch (GET_CODE (insn))
16460     {
16461     case CODE_LABEL:
16462       /* It will always be better to place the table before the label, rather
16463          than after it.  */
16464       return 50;
16465
16466     case INSN:
16467     case CALL_INSN:
16468       return base_cost;
16469
16470     case JUMP_INSN:
16471       return base_cost - 10;
16472
16473     default:
16474       return base_cost + 10;
16475     }
16476 }
16477
16478 /* Find the best place in the insn stream in the range
16479    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16480    Create the barrier by inserting a jump and add a new fix entry for
16481    it.  */
16482 static Mfix *
16483 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16484 {
16485   HOST_WIDE_INT count = 0;
16486   rtx_barrier *barrier;
16487   rtx_insn *from = fix->insn;
16488   /* The instruction after which we will insert the jump.  */
16489   rtx_insn *selected = NULL;
16490   int selected_cost;
16491   /* The address at which the jump instruction will be placed.  */
16492   HOST_WIDE_INT selected_address;
16493   Mfix * new_fix;
16494   HOST_WIDE_INT max_count = max_address - fix->address;
16495   rtx_code_label *label = gen_label_rtx ();
16496
16497   selected_cost = arm_barrier_cost (from);
16498   selected_address = fix->address;
16499
16500   while (from && count < max_count)
16501     {
16502       rtx_jump_table_data *tmp;
16503       int new_cost;
16504
16505       /* This code shouldn't have been called if there was a natural barrier
16506          within range.  */
16507       gcc_assert (!BARRIER_P (from));
16508
16509       /* Count the length of this insn.  This must stay in sync with the
16510          code that pushes minipool fixes.  */
16511       if (LABEL_P (from))
16512         count += get_label_padding (from);
16513       else
16514         count += get_attr_length (from);
16515
16516       /* If there is a jump table, add its length.  */
16517       if (tablejump_p (from, NULL, &tmp))
16518         {
16519           count += get_jump_table_size (tmp);
16520
16521           /* Jump tables aren't in a basic block, so base the cost on
16522              the dispatch insn.  If we select this location, we will
16523              still put the pool after the table.  */
16524           new_cost = arm_barrier_cost (from);
16525
16526           if (count < max_count
16527               && (!selected || new_cost <= selected_cost))
16528             {
16529               selected = tmp;
16530               selected_cost = new_cost;
16531               selected_address = fix->address + count;
16532             }
16533
16534           /* Continue after the dispatch table.  */
16535           from = NEXT_INSN (tmp);
16536           continue;
16537         }
16538
16539       new_cost = arm_barrier_cost (from);
16540
16541       if (count < max_count
16542           && (!selected || new_cost <= selected_cost))
16543         {
16544           selected = from;
16545           selected_cost = new_cost;
16546           selected_address = fix->address + count;
16547         }
16548
16549       from = NEXT_INSN (from);
16550     }
16551
16552   /* Make sure that we found a place to insert the jump.  */
16553   gcc_assert (selected);
16554
16555   /* Make sure we do not split a call and its corresponding
16556      CALL_ARG_LOCATION note.  */
16557   if (CALL_P (selected))
16558     {
16559       rtx_insn *next = NEXT_INSN (selected);
16560       if (next && NOTE_P (next)
16561           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16562           selected = next;
16563     }
16564
16565   /* Create a new JUMP_INSN that branches around a barrier.  */
16566   from = emit_jump_insn_after (gen_jump (label), selected);
16567   JUMP_LABEL (from) = label;
16568   barrier = emit_barrier_after (from);
16569   emit_label_after (label, barrier);
16570
16571   /* Create a minipool barrier entry for the new barrier.  */
16572   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16573   new_fix->insn = barrier;
16574   new_fix->address = selected_address;
16575   new_fix->next = fix->next;
16576   fix->next = new_fix;
16577
16578   return new_fix;
16579 }
16580
16581 /* Record that there is a natural barrier in the insn stream at
16582    ADDRESS.  */
16583 static void
16584 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16585 {
16586   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16587
16588   fix->insn = insn;
16589   fix->address = address;
16590
16591   fix->next = NULL;
16592   if (minipool_fix_head != NULL)
16593     minipool_fix_tail->next = fix;
16594   else
16595     minipool_fix_head = fix;
16596
16597   minipool_fix_tail = fix;
16598 }
16599
16600 /* Record INSN, which will need fixing up to load a value from the
16601    minipool.  ADDRESS is the offset of the insn since the start of the
16602    function; LOC is a pointer to the part of the insn which requires
16603    fixing; VALUE is the constant that must be loaded, which is of type
16604    MODE.  */
16605 static void
16606 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16607                    machine_mode mode, rtx value)
16608 {
16609   gcc_assert (!arm_disable_literal_pool);
16610   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16611
16612   fix->insn = insn;
16613   fix->address = address;
16614   fix->loc = loc;
16615   fix->mode = mode;
16616   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16617   fix->value = value;
16618   fix->forwards = get_attr_pool_range (insn);
16619   fix->backwards = get_attr_neg_pool_range (insn);
16620   fix->minipool = NULL;
16621
16622   /* If an insn doesn't have a range defined for it, then it isn't
16623      expecting to be reworked by this code.  Better to stop now than
16624      to generate duff assembly code.  */
16625   gcc_assert (fix->forwards || fix->backwards);
16626
16627   /* If an entry requires 8-byte alignment then assume all constant pools
16628      require 4 bytes of padding.  Trying to do this later on a per-pool
16629      basis is awkward because existing pool entries have to be modified.  */
16630   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16631     minipool_pad = 4;
16632
16633   if (dump_file)
16634     {
16635       fprintf (dump_file,
16636                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16637                GET_MODE_NAME (mode),
16638                INSN_UID (insn), (unsigned long) address,
16639                -1 * (long)fix->backwards, (long)fix->forwards);
16640       arm_print_value (dump_file, fix->value);
16641       fprintf (dump_file, "\n");
16642     }
16643
16644   /* Add it to the chain of fixes.  */
16645   fix->next = NULL;
16646
16647   if (minipool_fix_head != NULL)
16648     minipool_fix_tail->next = fix;
16649   else
16650     minipool_fix_head = fix;
16651
16652   minipool_fix_tail = fix;
16653 }
16654
16655 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16656    Returns the number of insns needed, or 99 if we always want to synthesize
16657    the value.  */
16658 int
16659 arm_max_const_double_inline_cost ()
16660 {
16661   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16662 }
16663
16664 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16665    Returns the number of insns needed, or 99 if we don't know how to
16666    do it.  */
16667 int
16668 arm_const_double_inline_cost (rtx val)
16669 {
16670   rtx lowpart, highpart;
16671   machine_mode mode;
16672
16673   mode = GET_MODE (val);
16674
16675   if (mode == VOIDmode)
16676     mode = DImode;
16677
16678   gcc_assert (GET_MODE_SIZE (mode) == 8);
16679
16680   lowpart = gen_lowpart (SImode, val);
16681   highpart = gen_highpart_mode (SImode, mode, val);
16682
16683   gcc_assert (CONST_INT_P (lowpart));
16684   gcc_assert (CONST_INT_P (highpart));
16685
16686   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16687                             NULL_RTX, NULL_RTX, 0, 0)
16688           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16689                               NULL_RTX, NULL_RTX, 0, 0));
16690 }
16691
16692 /* Cost of loading a SImode constant.  */
16693 static inline int
16694 arm_const_inline_cost (enum rtx_code code, rtx val)
16695 {
16696   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16697                            NULL_RTX, NULL_RTX, 1, 0);
16698 }
16699
16700 /* Return true if it is worthwhile to split a 64-bit constant into two
16701    32-bit operations.  This is the case if optimizing for size, or
16702    if we have load delay slots, or if one 32-bit part can be done with
16703    a single data operation.  */
16704 bool
16705 arm_const_double_by_parts (rtx val)
16706 {
16707   machine_mode mode = GET_MODE (val);
16708   rtx part;
16709
16710   if (optimize_size || arm_ld_sched)
16711     return true;
16712
16713   if (mode == VOIDmode)
16714     mode = DImode;
16715
16716   part = gen_highpart_mode (SImode, mode, val);
16717
16718   gcc_assert (CONST_INT_P (part));
16719
16720   if (const_ok_for_arm (INTVAL (part))
16721       || const_ok_for_arm (~INTVAL (part)))
16722     return true;
16723
16724   part = gen_lowpart (SImode, val);
16725
16726   gcc_assert (CONST_INT_P (part));
16727
16728   if (const_ok_for_arm (INTVAL (part))
16729       || const_ok_for_arm (~INTVAL (part)))
16730     return true;
16731
16732   return false;
16733 }
16734
16735 /* Return true if it is possible to inline both the high and low parts
16736    of a 64-bit constant into 32-bit data processing instructions.  */
16737 bool
16738 arm_const_double_by_immediates (rtx val)
16739 {
16740   machine_mode mode = GET_MODE (val);
16741   rtx part;
16742
16743   if (mode == VOIDmode)
16744     mode = DImode;
16745
16746   part = gen_highpart_mode (SImode, mode, val);
16747
16748   gcc_assert (CONST_INT_P (part));
16749
16750   if (!const_ok_for_arm (INTVAL (part)))
16751     return false;
16752
16753   part = gen_lowpart (SImode, val);
16754
16755   gcc_assert (CONST_INT_P (part));
16756
16757   if (!const_ok_for_arm (INTVAL (part)))
16758     return false;
16759
16760   return true;
16761 }
16762
16763 /* Scan INSN and note any of its operands that need fixing.
16764    If DO_PUSHES is false we do not actually push any of the fixups
16765    needed.  */
16766 static void
16767 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16768 {
16769   int opno;
16770
16771   extract_constrain_insn (insn);
16772
16773   if (recog_data.n_alternatives == 0)
16774     return;
16775
16776   /* Fill in recog_op_alt with information about the constraints of
16777      this insn.  */
16778   preprocess_constraints (insn);
16779
16780   const operand_alternative *op_alt = which_op_alt ();
16781   for (opno = 0; opno < recog_data.n_operands; opno++)
16782     {
16783       /* Things we need to fix can only occur in inputs.  */
16784       if (recog_data.operand_type[opno] != OP_IN)
16785         continue;
16786
16787       /* If this alternative is a memory reference, then any mention
16788          of constants in this alternative is really to fool reload
16789          into allowing us to accept one there.  We need to fix them up
16790          now so that we output the right code.  */
16791       if (op_alt[opno].memory_ok)
16792         {
16793           rtx op = recog_data.operand[opno];
16794
16795           if (CONSTANT_P (op))
16796             {
16797               if (do_pushes)
16798                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16799                                    recog_data.operand_mode[opno], op);
16800             }
16801           else if (MEM_P (op)
16802                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16803                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16804             {
16805               if (do_pushes)
16806                 {
16807                   rtx cop = avoid_constant_pool_reference (op);
16808
16809                   /* Casting the address of something to a mode narrower
16810                      than a word can cause avoid_constant_pool_reference()
16811                      to return the pool reference itself.  That's no good to
16812                      us here.  Lets just hope that we can use the
16813                      constant pool value directly.  */
16814                   if (op == cop)
16815                     cop = get_pool_constant (XEXP (op, 0));
16816
16817                   push_minipool_fix (insn, address,
16818                                      recog_data.operand_loc[opno],
16819                                      recog_data.operand_mode[opno], cop);
16820                 }
16821
16822             }
16823         }
16824     }
16825
16826   return;
16827 }
16828
16829 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16830    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16831    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16832    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16833    or four masks, depending on whether it is being computed for a
16834    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16835    respectively.  The tree for the type of the argument or a field within an
16836    argument is passed in ARG_TYPE, the current register this argument or field
16837    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16838    argument or field starts at is passed in STARTING_BIT and the last used bit
16839    is kept in LAST_USED_BIT which is also updated accordingly.  */
16840
16841 static unsigned HOST_WIDE_INT
16842 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16843                                uint32_t * padding_bits_to_clear,
16844                                unsigned starting_bit, int * last_used_bit)
16845
16846 {
16847   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16848
16849   if (TREE_CODE (arg_type) == RECORD_TYPE)
16850     {
16851       unsigned current_bit = starting_bit;
16852       tree field;
16853       long int offset, size;
16854
16855
16856       field = TYPE_FIELDS (arg_type);
16857       while (field)
16858         {
16859           /* The offset within a structure is always an offset from
16860              the start of that structure.  Make sure we take that into the
16861              calculation of the register based offset that we use here.  */
16862           offset = starting_bit;
16863           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16864           offset %= 32;
16865
16866           /* This is the actual size of the field, for bitfields this is the
16867              bitfield width and not the container size.  */
16868           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16869
16870           if (*last_used_bit != offset)
16871             {
16872               if (offset < *last_used_bit)
16873                 {
16874                   /* This field's offset is before the 'last_used_bit', that
16875                      means this field goes on the next register.  So we need to
16876                      pad the rest of the current register and increase the
16877                      register number.  */
16878                   uint32_t mask;
16879                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16880                   mask++;
16881
16882                   padding_bits_to_clear[*regno] |= mask;
16883                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16884                   (*regno)++;
16885                 }
16886               else
16887                 {
16888                   /* Otherwise we pad the bits between the last field's end and
16889                      the start of the new field.  */
16890                   uint32_t mask;
16891
16892                   mask = ((uint32_t)-1) >> (32 - offset);
16893                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16894                   padding_bits_to_clear[*regno] |= mask;
16895                 }
16896               current_bit = offset;
16897             }
16898
16899           /* Calculate further padding bits for inner structs/unions too.  */
16900           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16901             {
16902               *last_used_bit = current_bit;
16903               not_to_clear_reg_mask
16904                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16905                                                   padding_bits_to_clear, offset,
16906                                                   last_used_bit);
16907             }
16908           else
16909             {
16910               /* Update 'current_bit' with this field's size.  If the
16911                  'current_bit' lies in a subsequent register, update 'regno' and
16912                  reset 'current_bit' to point to the current bit in that new
16913                  register.  */
16914               current_bit += size;
16915               while (current_bit >= 32)
16916                 {
16917                   current_bit-=32;
16918                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16919                   (*regno)++;
16920                 }
16921               *last_used_bit = current_bit;
16922             }
16923
16924           field = TREE_CHAIN (field);
16925         }
16926       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16927     }
16928   else if (TREE_CODE (arg_type) == UNION_TYPE)
16929     {
16930       tree field, field_t;
16931       int i, regno_t, field_size;
16932       int max_reg = -1;
16933       int max_bit = -1;
16934       uint32_t mask;
16935       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16936         = {-1, -1, -1, -1};
16937
16938       /* To compute the padding bits in a union we only consider bits as
16939          padding bits if they are always either a padding bit or fall outside a
16940          fields size for all fields in the union.  */
16941       field = TYPE_FIELDS (arg_type);
16942       while (field)
16943         {
16944           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16945             = {0U, 0U, 0U, 0U};
16946           int last_used_bit_t = *last_used_bit;
16947           regno_t = *regno;
16948           field_t = TREE_TYPE (field);
16949
16950           /* If the field's type is either a record or a union make sure to
16951              compute their padding bits too.  */
16952           if (RECORD_OR_UNION_TYPE_P (field_t))
16953             not_to_clear_reg_mask
16954               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16955                                                 &padding_bits_to_clear_t[0],
16956                                                 starting_bit, &last_used_bit_t);
16957           else
16958             {
16959               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16960               regno_t = (field_size / 32) + *regno;
16961               last_used_bit_t = (starting_bit + field_size) % 32;
16962             }
16963
16964           for (i = *regno; i < regno_t; i++)
16965             {
16966               /* For all but the last register used by this field only keep the
16967                  padding bits that were padding bits in this field.  */
16968               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16969             }
16970
16971             /* For the last register, keep all padding bits that were padding
16972                bits in this field and any padding bits that are still valid
16973                as padding bits but fall outside of this field's size.  */
16974             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16975             padding_bits_to_clear_res[regno_t]
16976               &= padding_bits_to_clear_t[regno_t] | mask;
16977
16978           /* Update the maximum size of the fields in terms of registers used
16979              ('max_reg') and the 'last_used_bit' in said register.  */
16980           if (max_reg < regno_t)
16981             {
16982               max_reg = regno_t;
16983               max_bit = last_used_bit_t;
16984             }
16985           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16986             max_bit = last_used_bit_t;
16987
16988           field = TREE_CHAIN (field);
16989         }
16990
16991       /* Update the current padding_bits_to_clear using the intersection of the
16992          padding bits of all the fields.  */
16993       for (i=*regno; i < max_reg; i++)
16994         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16995
16996       /* Do not keep trailing padding bits, we do not know yet whether this
16997          is the end of the argument.  */
16998       mask = ((uint32_t) 1 << max_bit) - 1;
16999       padding_bits_to_clear[max_reg]
17000         |= padding_bits_to_clear_res[max_reg] & mask;
17001
17002       *regno = max_reg;
17003       *last_used_bit = max_bit;
17004     }
17005   else
17006     /* This function should only be used for structs and unions.  */
17007     gcc_unreachable ();
17008
17009   return not_to_clear_reg_mask;
17010 }
17011
17012 /* In the context of ARMv8-M Security Extensions, this function is used for both
17013    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17014    registers are used when returning or passing arguments, which is then
17015    returned as a mask.  It will also compute a mask to indicate padding/unused
17016    bits for each of these registers, and passes this through the
17017    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17018    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17019    the starting register used to pass this argument or return value is passed
17020    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17021    for struct and union types.  */
17022
17023 static unsigned HOST_WIDE_INT
17024 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17025                              uint32_t * padding_bits_to_clear)
17026
17027 {
17028   int last_used_bit = 0;
17029   unsigned HOST_WIDE_INT not_to_clear_mask;
17030
17031   if (RECORD_OR_UNION_TYPE_P (arg_type))
17032     {
17033       not_to_clear_mask
17034         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17035                                          padding_bits_to_clear, 0,
17036                                          &last_used_bit);
17037
17038
17039       /* If the 'last_used_bit' is not zero, that means we are still using a
17040          part of the last 'regno'.  In such cases we must clear the trailing
17041          bits.  Otherwise we are not using regno and we should mark it as to
17042          clear.  */
17043       if (last_used_bit != 0)
17044         padding_bits_to_clear[regno]
17045           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17046       else
17047         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17048     }
17049   else
17050     {
17051       not_to_clear_mask = 0;
17052       /* We are not dealing with structs nor unions.  So these arguments may be
17053          passed in floating point registers too.  In some cases a BLKmode is
17054          used when returning or passing arguments in multiple VFP registers.  */
17055       if (GET_MODE (arg_rtx) == BLKmode)
17056         {
17057           int i, arg_regs;
17058           rtx reg;
17059
17060           /* This should really only occur when dealing with the hard-float
17061              ABI.  */
17062           gcc_assert (TARGET_HARD_FLOAT_ABI);
17063
17064           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17065             {
17066               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17067               gcc_assert (REG_P (reg));
17068
17069               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17070
17071               /* If we are dealing with DF mode, make sure we don't
17072                  clear either of the registers it addresses.  */
17073               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17074               if (arg_regs > 1)
17075                 {
17076                   unsigned HOST_WIDE_INT mask;
17077                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17078                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17079                   not_to_clear_mask |= mask;
17080                 }
17081             }
17082         }
17083       else
17084         {
17085           /* Otherwise we can rely on the MODE to determine how many registers
17086              are being used by this argument.  */
17087           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17088           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17089           if (arg_regs > 1)
17090             {
17091               unsigned HOST_WIDE_INT
17092               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17093               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17094               not_to_clear_mask |= mask;
17095             }
17096         }
17097     }
17098
17099   return not_to_clear_mask;
17100 }
17101
17102 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17103    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17104    are to be fully cleared, using the value in register CLEARING_REG if more
17105    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17106    the bits that needs to be cleared in caller-saved core registers, with
17107    SCRATCH_REG used as a scratch register for that clearing.
17108
17109    NOTE: one of three following assertions must hold:
17110    - SCRATCH_REG is a low register
17111    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17112      in TO_CLEAR_BITMAP)
17113    - CLEARING_REG is a low register.  */
17114
17115 static void
17116 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17117                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17118 {
17119   bool saved_clearing = false;
17120   rtx saved_clearing_reg = NULL_RTX;
17121   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17122
17123   gcc_assert (arm_arch_cmse);
17124
17125   if (!bitmap_empty_p (to_clear_bitmap))
17126     {
17127       minregno = bitmap_first_set_bit (to_clear_bitmap);
17128       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17129     }
17130   clearing_regno = REGNO (clearing_reg);
17131
17132   /* Clear padding bits.  */
17133   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17134   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17135     {
17136       uint64_t mask;
17137       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17138
17139       if (padding_bits_to_clear[i] == 0)
17140         continue;
17141
17142       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17143          CLEARING_REG as scratch.  */
17144       if (TARGET_THUMB1
17145           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17146         {
17147           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17148              such that we can use clearing_reg to clear the unused bits in the
17149              arguments.  */
17150           if ((clearing_regno > maxregno
17151                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17152               && !saved_clearing)
17153             {
17154               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17155               emit_move_insn (scratch_reg, clearing_reg);
17156               saved_clearing = true;
17157               saved_clearing_reg = scratch_reg;
17158             }
17159           scratch_reg = clearing_reg;
17160         }
17161
17162       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17163       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17164       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17165
17166       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17167       mask = (~padding_bits_to_clear[i]) >> 16;
17168       rtx16 = gen_int_mode (16, SImode);
17169       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17170       if (mask)
17171         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17172
17173       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17174     }
17175   if (saved_clearing)
17176     emit_move_insn (clearing_reg, saved_clearing_reg);
17177
17178
17179   /* Clear full registers.  */
17180
17181   /* If not marked for clearing, clearing_reg already does not contain
17182      any secret.  */
17183   if (clearing_regno <= maxregno
17184       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17185     {
17186       emit_move_insn (clearing_reg, const0_rtx);
17187       emit_use (clearing_reg);
17188       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17189     }
17190
17191   for (regno = minregno; regno <= maxregno; regno++)
17192     {
17193       if (!bitmap_bit_p (to_clear_bitmap, regno))
17194         continue;
17195
17196       if (IS_VFP_REGNUM (regno))
17197         {
17198           /* If regno is an even vfp register and its successor is also to
17199              be cleared, use vmov.  */
17200           if (TARGET_VFP_DOUBLE
17201               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17202               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17203             {
17204               emit_move_insn (gen_rtx_REG (DFmode, regno),
17205                               CONST1_RTX (DFmode));
17206               emit_use (gen_rtx_REG (DFmode, regno));
17207               regno++;
17208             }
17209           else
17210             {
17211               emit_move_insn (gen_rtx_REG (SFmode, regno),
17212                               CONST1_RTX (SFmode));
17213               emit_use (gen_rtx_REG (SFmode, regno));
17214             }
17215         }
17216       else
17217         {
17218           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17219           emit_use (gen_rtx_REG (SImode, regno));
17220         }
17221     }
17222 }
17223
17224 /* Clears caller saved registers not used to pass arguments before a
17225    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17226    registers is done in __gnu_cmse_nonsecure_call libcall.
17227    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17228
17229 static void
17230 cmse_nonsecure_call_clear_caller_saved (void)
17231 {
17232   basic_block bb;
17233
17234   FOR_EACH_BB_FN (bb, cfun)
17235     {
17236       rtx_insn *insn;
17237
17238       FOR_BB_INSNS (bb, insn)
17239         {
17240           unsigned address_regnum, regno, maxregno =
17241             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17242           auto_sbitmap to_clear_bitmap (maxregno + 1);
17243           rtx_insn *seq;
17244           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17245           rtx address;
17246           CUMULATIVE_ARGS args_so_far_v;
17247           cumulative_args_t args_so_far;
17248           tree arg_type, fntype;
17249           bool first_param = true;
17250           function_args_iterator args_iter;
17251           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17252
17253           if (!NONDEBUG_INSN_P (insn))
17254             continue;
17255
17256           if (!CALL_P (insn))
17257             continue;
17258
17259           pat = PATTERN (insn);
17260           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17261           call = XVECEXP (pat, 0, 0);
17262
17263           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17264           if (GET_CODE (call) == SET)
17265               call = SET_SRC (call);
17266
17267           /* Check if it is a cmse_nonsecure_call.  */
17268           unspec = XEXP (call, 0);
17269           if (GET_CODE (unspec) != UNSPEC
17270               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17271             continue;
17272
17273           /* Determine the caller-saved registers we need to clear.  */
17274           bitmap_clear (to_clear_bitmap);
17275           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17276
17277           /* Only look at the caller-saved floating point registers in case of
17278              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17279              lazy store and loads which clear both caller- and callee-saved
17280              registers.  */
17281           if (TARGET_HARD_FLOAT_ABI)
17282             {
17283               auto_sbitmap float_bitmap (maxregno + 1);
17284
17285               bitmap_clear (float_bitmap);
17286               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17287                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17288               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17289             }
17290
17291           /* Make sure the register used to hold the function address is not
17292              cleared.  */
17293           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17294           gcc_assert (MEM_P (address));
17295           gcc_assert (REG_P (XEXP (address, 0)));
17296           address_regnum = REGNO (XEXP (address, 0));
17297           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17298             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17299
17300           /* Set basic block of call insn so that df rescan is performed on
17301              insns inserted here.  */
17302           set_block_for_insn (insn, bb);
17303           df_set_flags (DF_DEFER_INSN_RESCAN);
17304           start_sequence ();
17305
17306           /* Make sure the scheduler doesn't schedule other insns beyond
17307              here.  */
17308           emit_insn (gen_blockage ());
17309
17310           /* Walk through all arguments and clear registers appropriately.
17311           */
17312           fntype = TREE_TYPE (MEM_EXPR (address));
17313           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17314                                     NULL_TREE);
17315           args_so_far = pack_cumulative_args (&args_so_far_v);
17316           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17317             {
17318               rtx arg_rtx;
17319               uint64_t to_clear_args_mask;
17320               machine_mode arg_mode = TYPE_MODE (arg_type);
17321
17322               if (VOID_TYPE_P (arg_type))
17323                 continue;
17324
17325               if (!first_param)
17326                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17327                                           true);
17328
17329               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17330                                           true);
17331               gcc_assert (REG_P (arg_rtx));
17332               to_clear_args_mask
17333                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17334                                              REGNO (arg_rtx),
17335                                              &padding_bits_to_clear[0]);
17336               if (to_clear_args_mask)
17337                 {
17338                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17339                     {
17340                       if (to_clear_args_mask & (1ULL << regno))
17341                         bitmap_clear_bit (to_clear_bitmap, regno);
17342                     }
17343                 }
17344
17345               first_param = false;
17346             }
17347
17348           /* We use right shift and left shift to clear the LSB of the address
17349              we jump to instead of using bic, to avoid having to use an extra
17350              register on Thumb-1.  */
17351           clearing_reg = XEXP (address, 0);
17352           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17353           emit_insn (gen_rtx_SET (clearing_reg, shift));
17354           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17355           emit_insn (gen_rtx_SET (clearing_reg, shift));
17356
17357           /* Clear caller-saved registers that leak before doing a non-secure
17358              call.  */
17359           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17360           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17361                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17362
17363           seq = get_insns ();
17364           end_sequence ();
17365           emit_insn_before (seq, insn);
17366         }
17367     }
17368 }
17369
17370 /* Rewrite move insn into subtract of 0 if the condition codes will
17371    be useful in next conditional jump insn.  */
17372
17373 static void
17374 thumb1_reorg (void)
17375 {
17376   basic_block bb;
17377
17378   FOR_EACH_BB_FN (bb, cfun)
17379     {
17380       rtx dest, src;
17381       rtx cmp, op0, op1, set = NULL;
17382       rtx_insn *prev, *insn = BB_END (bb);
17383       bool insn_clobbered = false;
17384
17385       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17386         insn = PREV_INSN (insn);
17387
17388       /* Find the last cbranchsi4_insn in basic block BB.  */
17389       if (insn == BB_HEAD (bb)
17390           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17391         continue;
17392
17393       /* Get the register with which we are comparing.  */
17394       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17395       op0 = XEXP (cmp, 0);
17396       op1 = XEXP (cmp, 1);
17397
17398       /* Check that comparison is against ZERO.  */
17399       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17400         continue;
17401
17402       /* Find the first flag setting insn before INSN in basic block BB.  */
17403       gcc_assert (insn != BB_HEAD (bb));
17404       for (prev = PREV_INSN (insn);
17405            (!insn_clobbered
17406             && prev != BB_HEAD (bb)
17407             && (NOTE_P (prev)
17408                 || DEBUG_INSN_P (prev)
17409                 || ((set = single_set (prev)) != NULL
17410                     && get_attr_conds (prev) == CONDS_NOCOND)));
17411            prev = PREV_INSN (prev))
17412         {
17413           if (reg_set_p (op0, prev))
17414             insn_clobbered = true;
17415         }
17416
17417       /* Skip if op0 is clobbered by insn other than prev. */
17418       if (insn_clobbered)
17419         continue;
17420
17421       if (!set)
17422         continue;
17423
17424       dest = SET_DEST (set);
17425       src = SET_SRC (set);
17426       if (!low_register_operand (dest, SImode)
17427           || !low_register_operand (src, SImode))
17428         continue;
17429
17430       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17431          in INSN.  Both src and dest of the move insn are checked.  */
17432       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17433         {
17434           dest = copy_rtx (dest);
17435           src = copy_rtx (src);
17436           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17437           PATTERN (prev) = gen_rtx_SET (dest, src);
17438           INSN_CODE (prev) = -1;
17439           /* Set test register in INSN to dest.  */
17440           XEXP (cmp, 0) = copy_rtx (dest);
17441           INSN_CODE (insn) = -1;
17442         }
17443     }
17444 }
17445
17446 /* Convert instructions to their cc-clobbering variant if possible, since
17447    that allows us to use smaller encodings.  */
17448
17449 static void
17450 thumb2_reorg (void)
17451 {
17452   basic_block bb;
17453   regset_head live;
17454
17455   INIT_REG_SET (&live);
17456
17457   /* We are freeing block_for_insn in the toplev to keep compatibility
17458      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17459   compute_bb_for_insn ();
17460   df_analyze ();
17461
17462   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17463
17464   FOR_EACH_BB_FN (bb, cfun)
17465     {
17466       if ((current_tune->disparage_flag_setting_t16_encodings
17467            == tune_params::DISPARAGE_FLAGS_ALL)
17468           && optimize_bb_for_speed_p (bb))
17469         continue;
17470
17471       rtx_insn *insn;
17472       Convert_Action action = SKIP;
17473       Convert_Action action_for_partial_flag_setting
17474         = ((current_tune->disparage_flag_setting_t16_encodings
17475             != tune_params::DISPARAGE_FLAGS_NEITHER)
17476            && optimize_bb_for_speed_p (bb))
17477           ? SKIP : CONV;
17478
17479       COPY_REG_SET (&live, DF_LR_OUT (bb));
17480       df_simulate_initialize_backwards (bb, &live);
17481       FOR_BB_INSNS_REVERSE (bb, insn)
17482         {
17483           if (NONJUMP_INSN_P (insn)
17484               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17485               && GET_CODE (PATTERN (insn)) == SET)
17486             {
17487               action = SKIP;
17488               rtx pat = PATTERN (insn);
17489               rtx dst = XEXP (pat, 0);
17490               rtx src = XEXP (pat, 1);
17491               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17492
17493               if (UNARY_P (src) || BINARY_P (src))
17494                   op0 = XEXP (src, 0);
17495
17496               if (BINARY_P (src))
17497                   op1 = XEXP (src, 1);
17498
17499               if (low_register_operand (dst, SImode))
17500                 {
17501                   switch (GET_CODE (src))
17502                     {
17503                     case PLUS:
17504                       /* Adding two registers and storing the result
17505                          in the first source is already a 16-bit
17506                          operation.  */
17507                       if (rtx_equal_p (dst, op0)
17508                           && register_operand (op1, SImode))
17509                         break;
17510
17511                       if (low_register_operand (op0, SImode))
17512                         {
17513                           /* ADDS <Rd>,<Rn>,<Rm>  */
17514                           if (low_register_operand (op1, SImode))
17515                             action = CONV;
17516                           /* ADDS <Rdn>,#<imm8>  */
17517                           /* SUBS <Rdn>,#<imm8>  */
17518                           else if (rtx_equal_p (dst, op0)
17519                                    && CONST_INT_P (op1)
17520                                    && IN_RANGE (INTVAL (op1), -255, 255))
17521                             action = CONV;
17522                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17523                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17524                           else if (CONST_INT_P (op1)
17525                                    && IN_RANGE (INTVAL (op1), -7, 7))
17526                             action = CONV;
17527                         }
17528                       /* ADCS <Rd>, <Rn>  */
17529                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17530                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17531                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17532                                                        SImode)
17533                               && COMPARISON_P (op1)
17534                               && cc_register (XEXP (op1, 0), VOIDmode)
17535                               && maybe_get_arm_condition_code (op1) == ARM_CS
17536                               && XEXP (op1, 1) == const0_rtx)
17537                         action = CONV;
17538                       break;
17539
17540                     case MINUS:
17541                       /* RSBS <Rd>,<Rn>,#0
17542                          Not handled here: see NEG below.  */
17543                       /* SUBS <Rd>,<Rn>,#<imm3>
17544                          SUBS <Rdn>,#<imm8>
17545                          Not handled here: see PLUS above.  */
17546                       /* SUBS <Rd>,<Rn>,<Rm>  */
17547                       if (low_register_operand (op0, SImode)
17548                           && low_register_operand (op1, SImode))
17549                             action = CONV;
17550                       break;
17551
17552                     case MULT:
17553                       /* MULS <Rdm>,<Rn>,<Rdm>
17554                          As an exception to the rule, this is only used
17555                          when optimizing for size since MULS is slow on all
17556                          known implementations.  We do not even want to use
17557                          MULS in cold code, if optimizing for speed, so we
17558                          test the global flag here.  */
17559                       if (!optimize_size)
17560                         break;
17561                       /* Fall through.  */
17562                     case AND:
17563                     case IOR:
17564                     case XOR:
17565                       /* ANDS <Rdn>,<Rm>  */
17566                       if (rtx_equal_p (dst, op0)
17567                           && low_register_operand (op1, SImode))
17568                         action = action_for_partial_flag_setting;
17569                       else if (rtx_equal_p (dst, op1)
17570                                && low_register_operand (op0, SImode))
17571                         action = action_for_partial_flag_setting == SKIP
17572                                  ? SKIP : SWAP_CONV;
17573                       break;
17574
17575                     case ASHIFTRT:
17576                     case ASHIFT:
17577                     case LSHIFTRT:
17578                       /* ASRS <Rdn>,<Rm> */
17579                       /* LSRS <Rdn>,<Rm> */
17580                       /* LSLS <Rdn>,<Rm> */
17581                       if (rtx_equal_p (dst, op0)
17582                           && low_register_operand (op1, SImode))
17583                         action = action_for_partial_flag_setting;
17584                       /* ASRS <Rd>,<Rm>,#<imm5> */
17585                       /* LSRS <Rd>,<Rm>,#<imm5> */
17586                       /* LSLS <Rd>,<Rm>,#<imm5> */
17587                       else if (low_register_operand (op0, SImode)
17588                                && CONST_INT_P (op1)
17589                                && IN_RANGE (INTVAL (op1), 0, 31))
17590                         action = action_for_partial_flag_setting;
17591                       break;
17592
17593                     case ROTATERT:
17594                       /* RORS <Rdn>,<Rm>  */
17595                       if (rtx_equal_p (dst, op0)
17596                           && low_register_operand (op1, SImode))
17597                         action = action_for_partial_flag_setting;
17598                       break;
17599
17600                     case NOT:
17601                       /* MVNS <Rd>,<Rm>  */
17602                       if (low_register_operand (op0, SImode))
17603                         action = action_for_partial_flag_setting;
17604                       break;
17605
17606                     case NEG:
17607                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17608                       if (low_register_operand (op0, SImode))
17609                         action = CONV;
17610                       break;
17611
17612                     case CONST_INT:
17613                       /* MOVS <Rd>,#<imm8>  */
17614                       if (CONST_INT_P (src)
17615                           && IN_RANGE (INTVAL (src), 0, 255))
17616                         action = action_for_partial_flag_setting;
17617                       break;
17618
17619                     case REG:
17620                       /* MOVS and MOV<c> with registers have different
17621                          encodings, so are not relevant here.  */
17622                       break;
17623
17624                     default:
17625                       break;
17626                     }
17627                 }
17628
17629               if (action != SKIP)
17630                 {
17631                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17632                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17633                   rtvec vec;
17634
17635                   if (action == SWAP_CONV)
17636                     {
17637                       src = copy_rtx (src);
17638                       XEXP (src, 0) = op1;
17639                       XEXP (src, 1) = op0;
17640                       pat = gen_rtx_SET (dst, src);
17641                       vec = gen_rtvec (2, pat, clobber);
17642                     }
17643                   else /* action == CONV */
17644                     vec = gen_rtvec (2, pat, clobber);
17645
17646                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17647                   INSN_CODE (insn) = -1;
17648                 }
17649             }
17650
17651           if (NONDEBUG_INSN_P (insn))
17652             df_simulate_one_insn_backwards (bb, insn, &live);
17653         }
17654     }
17655
17656   CLEAR_REG_SET (&live);
17657 }
17658
17659 /* Gcc puts the pool in the wrong place for ARM, since we can only
17660    load addresses a limited distance around the pc.  We do some
17661    special munging to move the constant pool values to the correct
17662    point in the code.  */
17663 static void
17664 arm_reorg (void)
17665 {
17666   rtx_insn *insn;
17667   HOST_WIDE_INT address = 0;
17668   Mfix * fix;
17669
17670   if (use_cmse)
17671     cmse_nonsecure_call_clear_caller_saved ();
17672   if (TARGET_THUMB1)
17673     thumb1_reorg ();
17674   else if (TARGET_THUMB2)
17675     thumb2_reorg ();
17676
17677   /* Ensure all insns that must be split have been split at this point.
17678      Otherwise, the pool placement code below may compute incorrect
17679      insn lengths.  Note that when optimizing, all insns have already
17680      been split at this point.  */
17681   if (!optimize)
17682     split_all_insns_noflow ();
17683
17684   /* Make sure we do not attempt to create a literal pool even though it should
17685      no longer be necessary to create any.  */
17686   if (arm_disable_literal_pool)
17687     return ;
17688
17689   minipool_fix_head = minipool_fix_tail = NULL;
17690
17691   /* The first insn must always be a note, or the code below won't
17692      scan it properly.  */
17693   insn = get_insns ();
17694   gcc_assert (NOTE_P (insn));
17695   minipool_pad = 0;
17696
17697   /* Scan all the insns and record the operands that will need fixing.  */
17698   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17699     {
17700       if (BARRIER_P (insn))
17701         push_minipool_barrier (insn, address);
17702       else if (INSN_P (insn))
17703         {
17704           rtx_jump_table_data *table;
17705
17706           note_invalid_constants (insn, address, true);
17707           address += get_attr_length (insn);
17708
17709           /* If the insn is a vector jump, add the size of the table
17710              and skip the table.  */
17711           if (tablejump_p (insn, NULL, &table))
17712             {
17713               address += get_jump_table_size (table);
17714               insn = table;
17715             }
17716         }
17717       else if (LABEL_P (insn))
17718         /* Add the worst-case padding due to alignment.  We don't add
17719            the _current_ padding because the minipool insertions
17720            themselves might change it.  */
17721         address += get_label_padding (insn);
17722     }
17723
17724   fix = minipool_fix_head;
17725
17726   /* Now scan the fixups and perform the required changes.  */
17727   while (fix)
17728     {
17729       Mfix * ftmp;
17730       Mfix * fdel;
17731       Mfix *  last_added_fix;
17732       Mfix * last_barrier = NULL;
17733       Mfix * this_fix;
17734
17735       /* Skip any further barriers before the next fix.  */
17736       while (fix && BARRIER_P (fix->insn))
17737         fix = fix->next;
17738
17739       /* No more fixes.  */
17740       if (fix == NULL)
17741         break;
17742
17743       last_added_fix = NULL;
17744
17745       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17746         {
17747           if (BARRIER_P (ftmp->insn))
17748             {
17749               if (ftmp->address >= minipool_vector_head->max_address)
17750                 break;
17751
17752               last_barrier = ftmp;
17753             }
17754           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17755             break;
17756
17757           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17758         }
17759
17760       /* If we found a barrier, drop back to that; any fixes that we
17761          could have reached but come after the barrier will now go in
17762          the next mini-pool.  */
17763       if (last_barrier != NULL)
17764         {
17765           /* Reduce the refcount for those fixes that won't go into this
17766              pool after all.  */
17767           for (fdel = last_barrier->next;
17768                fdel && fdel != ftmp;
17769                fdel = fdel->next)
17770             {
17771               fdel->minipool->refcount--;
17772               fdel->minipool = NULL;
17773             }
17774
17775           ftmp = last_barrier;
17776         }
17777       else
17778         {
17779           /* ftmp is first fix that we can't fit into this pool and
17780              there no natural barriers that we could use.  Insert a
17781              new barrier in the code somewhere between the previous
17782              fix and this one, and arrange to jump around it.  */
17783           HOST_WIDE_INT max_address;
17784
17785           /* The last item on the list of fixes must be a barrier, so
17786              we can never run off the end of the list of fixes without
17787              last_barrier being set.  */
17788           gcc_assert (ftmp);
17789
17790           max_address = minipool_vector_head->max_address;
17791           /* Check that there isn't another fix that is in range that
17792              we couldn't fit into this pool because the pool was
17793              already too large: we need to put the pool before such an
17794              instruction.  The pool itself may come just after the
17795              fix because create_fix_barrier also allows space for a
17796              jump instruction.  */
17797           if (ftmp->address < max_address)
17798             max_address = ftmp->address + 1;
17799
17800           last_barrier = create_fix_barrier (last_added_fix, max_address);
17801         }
17802
17803       assign_minipool_offsets (last_barrier);
17804
17805       while (ftmp)
17806         {
17807           if (!BARRIER_P (ftmp->insn)
17808               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17809                   == NULL))
17810             break;
17811
17812           ftmp = ftmp->next;
17813         }
17814
17815       /* Scan over the fixes we have identified for this pool, fixing them
17816          up and adding the constants to the pool itself.  */
17817       for (this_fix = fix; this_fix && ftmp != this_fix;
17818            this_fix = this_fix->next)
17819         if (!BARRIER_P (this_fix->insn))
17820           {
17821             rtx addr
17822               = plus_constant (Pmode,
17823                                gen_rtx_LABEL_REF (VOIDmode,
17824                                                   minipool_vector_label),
17825                                this_fix->minipool->offset);
17826             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17827           }
17828
17829       dump_minipool (last_barrier->insn);
17830       fix = ftmp;
17831     }
17832
17833   /* From now on we must synthesize any constants that we can't handle
17834      directly.  This can happen if the RTL gets split during final
17835      instruction generation.  */
17836   cfun->machine->after_arm_reorg = 1;
17837
17838   /* Free the minipool memory.  */
17839   obstack_free (&minipool_obstack, minipool_startobj);
17840 }
17841 \f
17842 /* Routines to output assembly language.  */
17843
17844 /* Return string representation of passed in real value.  */
17845 static const char *
17846 fp_const_from_val (REAL_VALUE_TYPE *r)
17847 {
17848   if (!fp_consts_inited)
17849     init_fp_table ();
17850
17851   gcc_assert (real_equal (r, &value_fp0));
17852   return "0";
17853 }
17854
17855 /* OPERANDS[0] is the entire list of insns that constitute pop,
17856    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17857    is in the list, UPDATE is true iff the list contains explicit
17858    update of base register.  */
17859 void
17860 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17861                          bool update)
17862 {
17863   int i;
17864   char pattern[100];
17865   int offset;
17866   const char *conditional;
17867   int num_saves = XVECLEN (operands[0], 0);
17868   unsigned int regno;
17869   unsigned int regno_base = REGNO (operands[1]);
17870   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17871
17872   offset = 0;
17873   offset += update ? 1 : 0;
17874   offset += return_pc ? 1 : 0;
17875
17876   /* Is the base register in the list?  */
17877   for (i = offset; i < num_saves; i++)
17878     {
17879       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17880       /* If SP is in the list, then the base register must be SP.  */
17881       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17882       /* If base register is in the list, there must be no explicit update.  */
17883       if (regno == regno_base)
17884         gcc_assert (!update);
17885     }
17886
17887   conditional = reverse ? "%?%D0" : "%?%d0";
17888   /* Can't use POP if returning from an interrupt.  */
17889   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17890     sprintf (pattern, "pop%s\t{", conditional);
17891   else
17892     {
17893       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17894          It's just a convention, their semantics are identical.  */
17895       if (regno_base == SP_REGNUM)
17896         sprintf (pattern, "ldmfd%s\t", conditional);
17897       else if (update)
17898         sprintf (pattern, "ldmia%s\t", conditional);
17899       else
17900         sprintf (pattern, "ldm%s\t", conditional);
17901
17902       strcat (pattern, reg_names[regno_base]);
17903       if (update)
17904         strcat (pattern, "!, {");
17905       else
17906         strcat (pattern, ", {");
17907     }
17908
17909   /* Output the first destination register.  */
17910   strcat (pattern,
17911           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17912
17913   /* Output the rest of the destination registers.  */
17914   for (i = offset + 1; i < num_saves; i++)
17915     {
17916       strcat (pattern, ", ");
17917       strcat (pattern,
17918               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17919     }
17920
17921   strcat (pattern, "}");
17922
17923   if (interrupt_p && return_pc)
17924     strcat (pattern, "^");
17925
17926   output_asm_insn (pattern, &cond);
17927 }
17928
17929
17930 /* Output the assembly for a store multiple.  */
17931
17932 const char *
17933 vfp_output_vstmd (rtx * operands)
17934 {
17935   char pattern[100];
17936   int p;
17937   int base;
17938   int i;
17939   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17940                    ? XEXP (operands[0], 0)
17941                    : XEXP (XEXP (operands[0], 0), 0);
17942   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17943
17944   if (push_p)
17945     strcpy (pattern, "vpush%?.64\t{%P1");
17946   else
17947     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17948
17949   p = strlen (pattern);
17950
17951   gcc_assert (REG_P (operands[1]));
17952
17953   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17954   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17955     {
17956       p += sprintf (&pattern[p], ", d%d", base + i);
17957     }
17958   strcpy (&pattern[p], "}");
17959
17960   output_asm_insn (pattern, operands);
17961   return "";
17962 }
17963
17964
17965 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17966    number of bytes pushed.  */
17967
17968 static int
17969 vfp_emit_fstmd (int base_reg, int count)
17970 {
17971   rtx par;
17972   rtx dwarf;
17973   rtx tmp, reg;
17974   int i;
17975
17976   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17977      register pairs are stored by a store multiple insn.  We avoid this
17978      by pushing an extra pair.  */
17979   if (count == 2 && !arm_arch6)
17980     {
17981       if (base_reg == LAST_VFP_REGNUM - 3)
17982         base_reg -= 2;
17983       count++;
17984     }
17985
17986   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17987      larger stores into multiple parts (up to a maximum of two, in
17988      practice).  */
17989   if (count > 16)
17990     {
17991       int saved;
17992       /* NOTE: base_reg is an internal register number, so each D register
17993          counts as 2.  */
17994       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17995       saved += vfp_emit_fstmd (base_reg, 16);
17996       return saved;
17997     }
17998
17999   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18000   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18001
18002   reg = gen_rtx_REG (DFmode, base_reg);
18003   base_reg += 2;
18004
18005   XVECEXP (par, 0, 0)
18006     = gen_rtx_SET (gen_frame_mem
18007                    (BLKmode,
18008                     gen_rtx_PRE_MODIFY (Pmode,
18009                                         stack_pointer_rtx,
18010                                         plus_constant
18011                                         (Pmode, stack_pointer_rtx,
18012                                          - (count * 8)))
18013                     ),
18014                    gen_rtx_UNSPEC (BLKmode,
18015                                    gen_rtvec (1, reg),
18016                                    UNSPEC_PUSH_MULT));
18017
18018   tmp = gen_rtx_SET (stack_pointer_rtx,
18019                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18020   RTX_FRAME_RELATED_P (tmp) = 1;
18021   XVECEXP (dwarf, 0, 0) = tmp;
18022
18023   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18024   RTX_FRAME_RELATED_P (tmp) = 1;
18025   XVECEXP (dwarf, 0, 1) = tmp;
18026
18027   for (i = 1; i < count; i++)
18028     {
18029       reg = gen_rtx_REG (DFmode, base_reg);
18030       base_reg += 2;
18031       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18032
18033       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18034                                         plus_constant (Pmode,
18035                                                        stack_pointer_rtx,
18036                                                        i * 8)),
18037                          reg);
18038       RTX_FRAME_RELATED_P (tmp) = 1;
18039       XVECEXP (dwarf, 0, i + 1) = tmp;
18040     }
18041
18042   par = emit_insn (par);
18043   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18044   RTX_FRAME_RELATED_P (par) = 1;
18045
18046   return count * 8;
18047 }
18048
18049 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18050    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18051
18052 bool
18053 detect_cmse_nonsecure_call (tree addr)
18054 {
18055   if (!addr)
18056     return FALSE;
18057
18058   tree fntype = TREE_TYPE (addr);
18059   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18060                                     TYPE_ATTRIBUTES (fntype)))
18061     return TRUE;
18062   return FALSE;
18063 }
18064
18065
18066 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18067    the call target.  */
18068
18069 void
18070 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18071 {
18072   rtx insn;
18073
18074   insn = emit_call_insn (pat);
18075
18076   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18077      If the call might use such an entry, add a use of the PIC register
18078      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18079   if (TARGET_VXWORKS_RTP
18080       && flag_pic
18081       && !sibcall
18082       && GET_CODE (addr) == SYMBOL_REF
18083       && (SYMBOL_REF_DECL (addr)
18084           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18085           : !SYMBOL_REF_LOCAL_P (addr)))
18086     {
18087       require_pic_register ();
18088       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18089     }
18090
18091   if (TARGET_AAPCS_BASED)
18092     {
18093       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18094          linker.  We need to add an IP clobber to allow setting
18095          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18096          is not needed since it's a fixed register.  */
18097       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18098       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18099     }
18100 }
18101
18102 /* Output a 'call' insn.  */
18103 const char *
18104 output_call (rtx *operands)
18105 {
18106   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
18107
18108   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18109   if (REGNO (operands[0]) == LR_REGNUM)
18110     {
18111       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18112       output_asm_insn ("mov%?\t%0, %|lr", operands);
18113     }
18114
18115   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18116
18117   if (TARGET_INTERWORK || arm_arch4t)
18118     output_asm_insn ("bx%?\t%0", operands);
18119   else
18120     output_asm_insn ("mov%?\t%|pc, %0", operands);
18121
18122   return "";
18123 }
18124
18125 /* Output a move from arm registers to arm registers of a long double
18126    OPERANDS[0] is the destination.
18127    OPERANDS[1] is the source.  */
18128 const char *
18129 output_mov_long_double_arm_from_arm (rtx *operands)
18130 {
18131   /* We have to be careful here because the two might overlap.  */
18132   int dest_start = REGNO (operands[0]);
18133   int src_start = REGNO (operands[1]);
18134   rtx ops[2];
18135   int i;
18136
18137   if (dest_start < src_start)
18138     {
18139       for (i = 0; i < 3; i++)
18140         {
18141           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18142           ops[1] = gen_rtx_REG (SImode, src_start + i);
18143           output_asm_insn ("mov%?\t%0, %1", ops);
18144         }
18145     }
18146   else
18147     {
18148       for (i = 2; i >= 0; i--)
18149         {
18150           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18151           ops[1] = gen_rtx_REG (SImode, src_start + i);
18152           output_asm_insn ("mov%?\t%0, %1", ops);
18153         }
18154     }
18155
18156   return "";
18157 }
18158
18159 void
18160 arm_emit_movpair (rtx dest, rtx src)
18161  {
18162   /* If the src is an immediate, simplify it.  */
18163   if (CONST_INT_P (src))
18164     {
18165       HOST_WIDE_INT val = INTVAL (src);
18166       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18167       if ((val >> 16) & 0x0000ffff)
18168         {
18169           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18170                                                GEN_INT (16)),
18171                          GEN_INT ((val >> 16) & 0x0000ffff));
18172           rtx_insn *insn = get_last_insn ();
18173           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18174         }
18175       return;
18176     }
18177    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18178    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18179    rtx_insn *insn = get_last_insn ();
18180    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18181  }
18182
18183 /* Output a move between double words.  It must be REG<-MEM
18184    or MEM<-REG.  */
18185 const char *
18186 output_move_double (rtx *operands, bool emit, int *count)
18187 {
18188   enum rtx_code code0 = GET_CODE (operands[0]);
18189   enum rtx_code code1 = GET_CODE (operands[1]);
18190   rtx otherops[3];
18191   if (count)
18192     *count = 1;
18193
18194   /* The only case when this might happen is when
18195      you are looking at the length of a DImode instruction
18196      that has an invalid constant in it.  */
18197   if (code0 == REG && code1 != MEM)
18198     {
18199       gcc_assert (!emit);
18200       *count = 2;
18201       return "";
18202     }
18203
18204   if (code0 == REG)
18205     {
18206       unsigned int reg0 = REGNO (operands[0]);
18207
18208       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18209
18210       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18211
18212       switch (GET_CODE (XEXP (operands[1], 0)))
18213         {
18214         case REG:
18215
18216           if (emit)
18217             {
18218               if (TARGET_LDRD
18219                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18220                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18221               else
18222                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18223             }
18224           break;
18225
18226         case PRE_INC:
18227           gcc_assert (TARGET_LDRD);
18228           if (emit)
18229             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18230           break;
18231
18232         case PRE_DEC:
18233           if (emit)
18234             {
18235               if (TARGET_LDRD)
18236                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18237               else
18238                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18239             }
18240           break;
18241
18242         case POST_INC:
18243           if (emit)
18244             {
18245               if (TARGET_LDRD)
18246                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18247               else
18248                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18249             }
18250           break;
18251
18252         case POST_DEC:
18253           gcc_assert (TARGET_LDRD);
18254           if (emit)
18255             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18256           break;
18257
18258         case PRE_MODIFY:
18259         case POST_MODIFY:
18260           /* Autoicrement addressing modes should never have overlapping
18261              base and destination registers, and overlapping index registers
18262              are already prohibited, so this doesn't need to worry about
18263              fix_cm3_ldrd.  */
18264           otherops[0] = operands[0];
18265           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18266           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18267
18268           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18269             {
18270               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18271                 {
18272                   /* Registers overlap so split out the increment.  */
18273                   if (emit)
18274                     {
18275                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18276                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18277                     }
18278                   if (count)
18279                     *count = 2;
18280                 }
18281               else
18282                 {
18283                   /* Use a single insn if we can.
18284                      FIXME: IWMMXT allows offsets larger than ldrd can
18285                      handle, fix these up with a pair of ldr.  */
18286                   if (TARGET_THUMB2
18287                       || !CONST_INT_P (otherops[2])
18288                       || (INTVAL (otherops[2]) > -256
18289                           && INTVAL (otherops[2]) < 256))
18290                     {
18291                       if (emit)
18292                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18293                     }
18294                   else
18295                     {
18296                       if (emit)
18297                         {
18298                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18299                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18300                         }
18301                       if (count)
18302                         *count = 2;
18303
18304                     }
18305                 }
18306             }
18307           else
18308             {
18309               /* Use a single insn if we can.
18310                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18311                  fix these up with a pair of ldr.  */
18312               if (TARGET_THUMB2
18313                   || !CONST_INT_P (otherops[2])
18314                   || (INTVAL (otherops[2]) > -256
18315                       && INTVAL (otherops[2]) < 256))
18316                 {
18317                   if (emit)
18318                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18319                 }
18320               else
18321                 {
18322                   if (emit)
18323                     {
18324                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18325                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18326                     }
18327                   if (count)
18328                     *count = 2;
18329                 }
18330             }
18331           break;
18332
18333         case LABEL_REF:
18334         case CONST:
18335           /* We might be able to use ldrd %0, %1 here.  However the range is
18336              different to ldr/adr, and it is broken on some ARMv7-M
18337              implementations.  */
18338           /* Use the second register of the pair to avoid problematic
18339              overlap.  */
18340           otherops[1] = operands[1];
18341           if (emit)
18342             output_asm_insn ("adr%?\t%0, %1", otherops);
18343           operands[1] = otherops[0];
18344           if (emit)
18345             {
18346               if (TARGET_LDRD)
18347                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18348               else
18349                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18350             }
18351
18352           if (count)
18353             *count = 2;
18354           break;
18355
18356           /* ??? This needs checking for thumb2.  */
18357         default:
18358           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18359                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18360             {
18361               otherops[0] = operands[0];
18362               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18363               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18364
18365               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18366                 {
18367                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18368                     {
18369                       switch ((int) INTVAL (otherops[2]))
18370                         {
18371                         case -8:
18372                           if (emit)
18373                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18374                           return "";
18375                         case -4:
18376                           if (TARGET_THUMB2)
18377                             break;
18378                           if (emit)
18379                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18380                           return "";
18381                         case 4:
18382                           if (TARGET_THUMB2)
18383                             break;
18384                           if (emit)
18385                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18386                           return "";
18387                         }
18388                     }
18389                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18390                   operands[1] = otherops[0];
18391                   if (TARGET_LDRD
18392                       && (REG_P (otherops[2])
18393                           || TARGET_THUMB2
18394                           || (CONST_INT_P (otherops[2])
18395                               && INTVAL (otherops[2]) > -256
18396                               && INTVAL (otherops[2]) < 256)))
18397                     {
18398                       if (reg_overlap_mentioned_p (operands[0],
18399                                                    otherops[2]))
18400                         {
18401                           /* Swap base and index registers over to
18402                              avoid a conflict.  */
18403                           std::swap (otherops[1], otherops[2]);
18404                         }
18405                       /* If both registers conflict, it will usually
18406                          have been fixed by a splitter.  */
18407                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18408                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18409                         {
18410                           if (emit)
18411                             {
18412                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18413                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18414                             }
18415                           if (count)
18416                             *count = 2;
18417                         }
18418                       else
18419                         {
18420                           otherops[0] = operands[0];
18421                           if (emit)
18422                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18423                         }
18424                       return "";
18425                     }
18426
18427                   if (CONST_INT_P (otherops[2]))
18428                     {
18429                       if (emit)
18430                         {
18431                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18432                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18433                           else
18434                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18435                         }
18436                     }
18437                   else
18438                     {
18439                       if (emit)
18440                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18441                     }
18442                 }
18443               else
18444                 {
18445                   if (emit)
18446                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18447                 }
18448
18449               if (count)
18450                 *count = 2;
18451
18452               if (TARGET_LDRD)
18453                 return "ldrd%?\t%0, [%1]";
18454
18455               return "ldmia%?\t%1, %M0";
18456             }
18457           else
18458             {
18459               otherops[1] = adjust_address (operands[1], SImode, 4);
18460               /* Take care of overlapping base/data reg.  */
18461               if (reg_mentioned_p (operands[0], operands[1]))
18462                 {
18463                   if (emit)
18464                     {
18465                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18466                       output_asm_insn ("ldr%?\t%0, %1", operands);
18467                     }
18468                   if (count)
18469                     *count = 2;
18470
18471                 }
18472               else
18473                 {
18474                   if (emit)
18475                     {
18476                       output_asm_insn ("ldr%?\t%0, %1", operands);
18477                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18478                     }
18479                   if (count)
18480                     *count = 2;
18481                 }
18482             }
18483         }
18484     }
18485   else
18486     {
18487       /* Constraints should ensure this.  */
18488       gcc_assert (code0 == MEM && code1 == REG);
18489       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18490                   || (TARGET_ARM && TARGET_LDRD));
18491
18492       switch (GET_CODE (XEXP (operands[0], 0)))
18493         {
18494         case REG:
18495           if (emit)
18496             {
18497               if (TARGET_LDRD)
18498                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18499               else
18500                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18501             }
18502           break;
18503
18504         case PRE_INC:
18505           gcc_assert (TARGET_LDRD);
18506           if (emit)
18507             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18508           break;
18509
18510         case PRE_DEC:
18511           if (emit)
18512             {
18513               if (TARGET_LDRD)
18514                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18515               else
18516                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18517             }
18518           break;
18519
18520         case POST_INC:
18521           if (emit)
18522             {
18523               if (TARGET_LDRD)
18524                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18525               else
18526                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18527             }
18528           break;
18529
18530         case POST_DEC:
18531           gcc_assert (TARGET_LDRD);
18532           if (emit)
18533             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18534           break;
18535
18536         case PRE_MODIFY:
18537         case POST_MODIFY:
18538           otherops[0] = operands[1];
18539           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18540           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18541
18542           /* IWMMXT allows offsets larger than ldrd can handle,
18543              fix these up with a pair of ldr.  */
18544           if (!TARGET_THUMB2
18545               && CONST_INT_P (otherops[2])
18546               && (INTVAL(otherops[2]) <= -256
18547                   || INTVAL(otherops[2]) >= 256))
18548             {
18549               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18550                 {
18551                   if (emit)
18552                     {
18553                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18554                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18555                     }
18556                   if (count)
18557                     *count = 2;
18558                 }
18559               else
18560                 {
18561                   if (emit)
18562                     {
18563                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18564                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18565                     }
18566                   if (count)
18567                     *count = 2;
18568                 }
18569             }
18570           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18571             {
18572               if (emit)
18573                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18574             }
18575           else
18576             {
18577               if (emit)
18578                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18579             }
18580           break;
18581
18582         case PLUS:
18583           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18584           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18585             {
18586               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18587                 {
18588                 case -8:
18589                   if (emit)
18590                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18591                   return "";
18592
18593                 case -4:
18594                   if (TARGET_THUMB2)
18595                     break;
18596                   if (emit)
18597                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18598                   return "";
18599
18600                 case 4:
18601                   if (TARGET_THUMB2)
18602                     break;
18603                   if (emit)
18604                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18605                   return "";
18606                 }
18607             }
18608           if (TARGET_LDRD
18609               && (REG_P (otherops[2])
18610                   || TARGET_THUMB2
18611                   || (CONST_INT_P (otherops[2])
18612                       && INTVAL (otherops[2]) > -256
18613                       && INTVAL (otherops[2]) < 256)))
18614             {
18615               otherops[0] = operands[1];
18616               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18617               if (emit)
18618                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18619               return "";
18620             }
18621           /* Fall through */
18622
18623         default:
18624           otherops[0] = adjust_address (operands[0], SImode, 4);
18625           otherops[1] = operands[1];
18626           if (emit)
18627             {
18628               output_asm_insn ("str%?\t%1, %0", operands);
18629               output_asm_insn ("str%?\t%H1, %0", otherops);
18630             }
18631           if (count)
18632             *count = 2;
18633         }
18634     }
18635
18636   return "";
18637 }
18638
18639 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18640    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18641
18642 const char *
18643 output_move_quad (rtx *operands)
18644 {
18645   if (REG_P (operands[0]))
18646     {
18647       /* Load, or reg->reg move.  */
18648
18649       if (MEM_P (operands[1]))
18650         {
18651           switch (GET_CODE (XEXP (operands[1], 0)))
18652             {
18653             case REG:
18654               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18655               break;
18656
18657             case LABEL_REF:
18658             case CONST:
18659               output_asm_insn ("adr%?\t%0, %1", operands);
18660               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18661               break;
18662
18663             default:
18664               gcc_unreachable ();
18665             }
18666         }
18667       else
18668         {
18669           rtx ops[2];
18670           int dest, src, i;
18671
18672           gcc_assert (REG_P (operands[1]));
18673
18674           dest = REGNO (operands[0]);
18675           src = REGNO (operands[1]);
18676
18677           /* This seems pretty dumb, but hopefully GCC won't try to do it
18678              very often.  */
18679           if (dest < src)
18680             for (i = 0; i < 4; i++)
18681               {
18682                 ops[0] = gen_rtx_REG (SImode, dest + i);
18683                 ops[1] = gen_rtx_REG (SImode, src + i);
18684                 output_asm_insn ("mov%?\t%0, %1", ops);
18685               }
18686           else
18687             for (i = 3; i >= 0; i--)
18688               {
18689                 ops[0] = gen_rtx_REG (SImode, dest + i);
18690                 ops[1] = gen_rtx_REG (SImode, src + i);
18691                 output_asm_insn ("mov%?\t%0, %1", ops);
18692               }
18693         }
18694     }
18695   else
18696     {
18697       gcc_assert (MEM_P (operands[0]));
18698       gcc_assert (REG_P (operands[1]));
18699       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18700
18701       switch (GET_CODE (XEXP (operands[0], 0)))
18702         {
18703         case REG:
18704           output_asm_insn ("stm%?\t%m0, %M1", operands);
18705           break;
18706
18707         default:
18708           gcc_unreachable ();
18709         }
18710     }
18711
18712   return "";
18713 }
18714
18715 /* Output a VFP load or store instruction.  */
18716
18717 const char *
18718 output_move_vfp (rtx *operands)
18719 {
18720   rtx reg, mem, addr, ops[2];
18721   int load = REG_P (operands[0]);
18722   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18723   int sp = (!TARGET_VFP_FP16INST
18724             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18725   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18726   const char *templ;
18727   char buff[50];
18728   machine_mode mode;
18729
18730   reg = operands[!load];
18731   mem = operands[load];
18732
18733   mode = GET_MODE (reg);
18734
18735   gcc_assert (REG_P (reg));
18736   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18737   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18738               || mode == SFmode
18739               || mode == DFmode
18740               || mode == HImode
18741               || mode == SImode
18742               || mode == DImode
18743               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18744   gcc_assert (MEM_P (mem));
18745
18746   addr = XEXP (mem, 0);
18747
18748   switch (GET_CODE (addr))
18749     {
18750     case PRE_DEC:
18751       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18752       ops[0] = XEXP (addr, 0);
18753       ops[1] = reg;
18754       break;
18755
18756     case POST_INC:
18757       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18758       ops[0] = XEXP (addr, 0);
18759       ops[1] = reg;
18760       break;
18761
18762     default:
18763       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18764       ops[0] = reg;
18765       ops[1] = mem;
18766       break;
18767     }
18768
18769   sprintf (buff, templ,
18770            load ? "ld" : "st",
18771            dp ? "64" : sp ? "32" : "16",
18772            dp ? "P" : "",
18773            integer_p ? "\t%@ int" : "");
18774   output_asm_insn (buff, ops);
18775
18776   return "";
18777 }
18778
18779 /* Output a Neon double-word or quad-word load or store, or a load
18780    or store for larger structure modes.
18781
18782    WARNING: The ordering of elements is weird in big-endian mode,
18783    because the EABI requires that vectors stored in memory appear
18784    as though they were stored by a VSTM, as required by the EABI.
18785    GCC RTL defines element ordering based on in-memory order.
18786    This can be different from the architectural ordering of elements
18787    within a NEON register. The intrinsics defined in arm_neon.h use the
18788    NEON register element ordering, not the GCC RTL element ordering.
18789
18790    For example, the in-memory ordering of a big-endian a quadword
18791    vector with 16-bit elements when stored from register pair {d0,d1}
18792    will be (lowest address first, d0[N] is NEON register element N):
18793
18794      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18795
18796    When necessary, quadword registers (dN, dN+1) are moved to ARM
18797    registers from rN in the order:
18798
18799      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18800
18801    So that STM/LDM can be used on vectors in ARM registers, and the
18802    same memory layout will result as if VSTM/VLDM were used.
18803
18804    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18805    possible, which allows use of appropriate alignment tags.
18806    Note that the choice of "64" is independent of the actual vector
18807    element size; this size simply ensures that the behavior is
18808    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18809
18810    Due to limitations of those instructions, use of VST1.64/VLD1.64
18811    is not possible if:
18812     - the address contains PRE_DEC, or
18813     - the mode refers to more than 4 double-word registers
18814
18815    In those cases, it would be possible to replace VSTM/VLDM by a
18816    sequence of instructions; this is not currently implemented since
18817    this is not certain to actually improve performance.  */
18818
18819 const char *
18820 output_move_neon (rtx *operands)
18821 {
18822   rtx reg, mem, addr, ops[2];
18823   int regno, nregs, load = REG_P (operands[0]);
18824   const char *templ;
18825   char buff[50];
18826   machine_mode mode;
18827
18828   reg = operands[!load];
18829   mem = operands[load];
18830
18831   mode = GET_MODE (reg);
18832
18833   gcc_assert (REG_P (reg));
18834   regno = REGNO (reg);
18835   nregs = REG_NREGS (reg) / 2;
18836   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18837               || NEON_REGNO_OK_FOR_QUAD (regno));
18838   gcc_assert (VALID_NEON_DREG_MODE (mode)
18839               || VALID_NEON_QREG_MODE (mode)
18840               || VALID_NEON_STRUCT_MODE (mode));
18841   gcc_assert (MEM_P (mem));
18842
18843   addr = XEXP (mem, 0);
18844
18845   /* Strip off const from addresses like (const (plus (...))).  */
18846   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18847     addr = XEXP (addr, 0);
18848
18849   switch (GET_CODE (addr))
18850     {
18851     case POST_INC:
18852       /* We have to use vldm / vstm for too-large modes.  */
18853       if (nregs > 4)
18854         {
18855           templ = "v%smia%%?\t%%0!, %%h1";
18856           ops[0] = XEXP (addr, 0);
18857         }
18858       else
18859         {
18860           templ = "v%s1.64\t%%h1, %%A0";
18861           ops[0] = mem;
18862         }
18863       ops[1] = reg;
18864       break;
18865
18866     case PRE_DEC:
18867       /* We have to use vldm / vstm in this case, since there is no
18868          pre-decrement form of the vld1 / vst1 instructions.  */
18869       templ = "v%smdb%%?\t%%0!, %%h1";
18870       ops[0] = XEXP (addr, 0);
18871       ops[1] = reg;
18872       break;
18873
18874     case POST_MODIFY:
18875       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18876       gcc_unreachable ();
18877
18878     case REG:
18879       /* We have to use vldm / vstm for too-large modes.  */
18880       if (nregs > 1)
18881         {
18882           if (nregs > 4)
18883             templ = "v%smia%%?\t%%m0, %%h1";
18884           else
18885             templ = "v%s1.64\t%%h1, %%A0";
18886
18887           ops[0] = mem;
18888           ops[1] = reg;
18889           break;
18890         }
18891       /* Fall through.  */
18892     case LABEL_REF:
18893     case PLUS:
18894       {
18895         int i;
18896         int overlap = -1;
18897         for (i = 0; i < nregs; i++)
18898           {
18899             /* We're only using DImode here because it's a convenient size.  */
18900             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18901             ops[1] = adjust_address (mem, DImode, 8 * i);
18902             if (reg_overlap_mentioned_p (ops[0], mem))
18903               {
18904                 gcc_assert (overlap == -1);
18905                 overlap = i;
18906               }
18907             else
18908               {
18909                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18910                 output_asm_insn (buff, ops);
18911               }
18912           }
18913         if (overlap != -1)
18914           {
18915             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18916             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18917             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18918             output_asm_insn (buff, ops);
18919           }
18920
18921         return "";
18922       }
18923
18924     default:
18925       gcc_unreachable ();
18926     }
18927
18928   sprintf (buff, templ, load ? "ld" : "st");
18929   output_asm_insn (buff, ops);
18930
18931   return "";
18932 }
18933
18934 /* Compute and return the length of neon_mov<mode>, where <mode> is
18935    one of VSTRUCT modes: EI, OI, CI or XI.  */
18936 int
18937 arm_attr_length_move_neon (rtx_insn *insn)
18938 {
18939   rtx reg, mem, addr;
18940   int load;
18941   machine_mode mode;
18942
18943   extract_insn_cached (insn);
18944
18945   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18946     {
18947       mode = GET_MODE (recog_data.operand[0]);
18948       switch (mode)
18949         {
18950         case E_EImode:
18951         case E_OImode:
18952           return 8;
18953         case E_CImode:
18954           return 12;
18955         case E_XImode:
18956           return 16;
18957         default:
18958           gcc_unreachable ();
18959         }
18960     }
18961
18962   load = REG_P (recog_data.operand[0]);
18963   reg = recog_data.operand[!load];
18964   mem = recog_data.operand[load];
18965
18966   gcc_assert (MEM_P (mem));
18967
18968   addr = XEXP (mem, 0);
18969
18970   /* Strip off const from addresses like (const (plus (...))).  */
18971   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18972     addr = XEXP (addr, 0);
18973
18974   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18975     {
18976       int insns = REG_NREGS (reg) / 2;
18977       return insns * 4;
18978     }
18979   else
18980     return 4;
18981 }
18982
18983 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18984    return zero.  */
18985
18986 int
18987 arm_address_offset_is_imm (rtx_insn *insn)
18988 {
18989   rtx mem, addr;
18990
18991   extract_insn_cached (insn);
18992
18993   if (REG_P (recog_data.operand[0]))
18994     return 0;
18995
18996   mem = recog_data.operand[0];
18997
18998   gcc_assert (MEM_P (mem));
18999
19000   addr = XEXP (mem, 0);
19001
19002   if (REG_P (addr)
19003       || (GET_CODE (addr) == PLUS
19004           && REG_P (XEXP (addr, 0))
19005           && CONST_INT_P (XEXP (addr, 1))))
19006     return 1;
19007   else
19008     return 0;
19009 }
19010
19011 /* Output an ADD r, s, #n where n may be too big for one instruction.
19012    If adding zero to one register, output nothing.  */
19013 const char *
19014 output_add_immediate (rtx *operands)
19015 {
19016   HOST_WIDE_INT n = INTVAL (operands[2]);
19017
19018   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19019     {
19020       if (n < 0)
19021         output_multi_immediate (operands,
19022                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19023                                 -n);
19024       else
19025         output_multi_immediate (operands,
19026                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19027                                 n);
19028     }
19029
19030   return "";
19031 }
19032
19033 /* Output a multiple immediate operation.
19034    OPERANDS is the vector of operands referred to in the output patterns.
19035    INSTR1 is the output pattern to use for the first constant.
19036    INSTR2 is the output pattern to use for subsequent constants.
19037    IMMED_OP is the index of the constant slot in OPERANDS.
19038    N is the constant value.  */
19039 static const char *
19040 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19041                         int immed_op, HOST_WIDE_INT n)
19042 {
19043 #if HOST_BITS_PER_WIDE_INT > 32
19044   n &= 0xffffffff;
19045 #endif
19046
19047   if (n == 0)
19048     {
19049       /* Quick and easy output.  */
19050       operands[immed_op] = const0_rtx;
19051       output_asm_insn (instr1, operands);
19052     }
19053   else
19054     {
19055       int i;
19056       const char * instr = instr1;
19057
19058       /* Note that n is never zero here (which would give no output).  */
19059       for (i = 0; i < 32; i += 2)
19060         {
19061           if (n & (3 << i))
19062             {
19063               operands[immed_op] = GEN_INT (n & (255 << i));
19064               output_asm_insn (instr, operands);
19065               instr = instr2;
19066               i += 6;
19067             }
19068         }
19069     }
19070
19071   return "";
19072 }
19073
19074 /* Return the name of a shifter operation.  */
19075 static const char *
19076 arm_shift_nmem(enum rtx_code code)
19077 {
19078   switch (code)
19079     {
19080     case ASHIFT:
19081       return ARM_LSL_NAME;
19082
19083     case ASHIFTRT:
19084       return "asr";
19085
19086     case LSHIFTRT:
19087       return "lsr";
19088
19089     case ROTATERT:
19090       return "ror";
19091
19092     default:
19093       abort();
19094     }
19095 }
19096
19097 /* Return the appropriate ARM instruction for the operation code.
19098    The returned result should not be overwritten.  OP is the rtx of the
19099    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19100    was shifted.  */
19101 const char *
19102 arithmetic_instr (rtx op, int shift_first_arg)
19103 {
19104   switch (GET_CODE (op))
19105     {
19106     case PLUS:
19107       return "add";
19108
19109     case MINUS:
19110       return shift_first_arg ? "rsb" : "sub";
19111
19112     case IOR:
19113       return "orr";
19114
19115     case XOR:
19116       return "eor";
19117
19118     case AND:
19119       return "and";
19120
19121     case ASHIFT:
19122     case ASHIFTRT:
19123     case LSHIFTRT:
19124     case ROTATERT:
19125       return arm_shift_nmem(GET_CODE(op));
19126
19127     default:
19128       gcc_unreachable ();
19129     }
19130 }
19131
19132 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19133    for the operation code.  The returned result should not be overwritten.
19134    OP is the rtx code of the shift.
19135    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19136    shift.  */
19137 static const char *
19138 shift_op (rtx op, HOST_WIDE_INT *amountp)
19139 {
19140   const char * mnem;
19141   enum rtx_code code = GET_CODE (op);
19142
19143   switch (code)
19144     {
19145     case ROTATE:
19146       if (!CONST_INT_P (XEXP (op, 1)))
19147         {
19148           output_operand_lossage ("invalid shift operand");
19149           return NULL;
19150         }
19151
19152       code = ROTATERT;
19153       *amountp = 32 - INTVAL (XEXP (op, 1));
19154       mnem = "ror";
19155       break;
19156
19157     case ASHIFT:
19158     case ASHIFTRT:
19159     case LSHIFTRT:
19160     case ROTATERT:
19161       mnem = arm_shift_nmem(code);
19162       if (CONST_INT_P (XEXP (op, 1)))
19163         {
19164           *amountp = INTVAL (XEXP (op, 1));
19165         }
19166       else if (REG_P (XEXP (op, 1)))
19167         {
19168           *amountp = -1;
19169           return mnem;
19170         }
19171       else
19172         {
19173           output_operand_lossage ("invalid shift operand");
19174           return NULL;
19175         }
19176       break;
19177
19178     case MULT:
19179       /* We never have to worry about the amount being other than a
19180          power of 2, since this case can never be reloaded from a reg.  */
19181       if (!CONST_INT_P (XEXP (op, 1)))
19182         {
19183           output_operand_lossage ("invalid shift operand");
19184           return NULL;
19185         }
19186
19187       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19188
19189       /* Amount must be a power of two.  */
19190       if (*amountp & (*amountp - 1))
19191         {
19192           output_operand_lossage ("invalid shift operand");
19193           return NULL;
19194         }
19195
19196       *amountp = exact_log2 (*amountp);
19197       gcc_assert (IN_RANGE (*amountp, 0, 31));
19198       return ARM_LSL_NAME;
19199
19200     default:
19201       output_operand_lossage ("invalid shift operand");
19202       return NULL;
19203     }
19204
19205   /* This is not 100% correct, but follows from the desire to merge
19206      multiplication by a power of 2 with the recognizer for a
19207      shift.  >=32 is not a valid shift for "lsl", so we must try and
19208      output a shift that produces the correct arithmetical result.
19209      Using lsr #32 is identical except for the fact that the carry bit
19210      is not set correctly if we set the flags; but we never use the
19211      carry bit from such an operation, so we can ignore that.  */
19212   if (code == ROTATERT)
19213     /* Rotate is just modulo 32.  */
19214     *amountp &= 31;
19215   else if (*amountp != (*amountp & 31))
19216     {
19217       if (code == ASHIFT)
19218         mnem = "lsr";
19219       *amountp = 32;
19220     }
19221
19222   /* Shifts of 0 are no-ops.  */
19223   if (*amountp == 0)
19224     return NULL;
19225
19226   return mnem;
19227 }
19228
19229 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19230    because /bin/as is horribly restrictive.  The judgement about
19231    whether or not each character is 'printable' (and can be output as
19232    is) or not (and must be printed with an octal escape) must be made
19233    with reference to the *host* character set -- the situation is
19234    similar to that discussed in the comments above pp_c_char in
19235    c-pretty-print.c.  */
19236
19237 #define MAX_ASCII_LEN 51
19238
19239 void
19240 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19241 {
19242   int i;
19243   int len_so_far = 0;
19244
19245   fputs ("\t.ascii\t\"", stream);
19246
19247   for (i = 0; i < len; i++)
19248     {
19249       int c = p[i];
19250
19251       if (len_so_far >= MAX_ASCII_LEN)
19252         {
19253           fputs ("\"\n\t.ascii\t\"", stream);
19254           len_so_far = 0;
19255         }
19256
19257       if (ISPRINT (c))
19258         {
19259           if (c == '\\' || c == '\"')
19260             {
19261               putc ('\\', stream);
19262               len_so_far++;
19263             }
19264           putc (c, stream);
19265           len_so_far++;
19266         }
19267       else
19268         {
19269           fprintf (stream, "\\%03o", c);
19270           len_so_far += 4;
19271         }
19272     }
19273
19274   fputs ("\"\n", stream);
19275 }
19276 \f
19277 /* Whether a register is callee saved or not.  This is necessary because high
19278    registers are marked as caller saved when optimizing for size on Thumb-1
19279    targets despite being callee saved in order to avoid using them.  */
19280 #define callee_saved_reg_p(reg) \
19281   (!call_used_regs[reg] \
19282    || (TARGET_THUMB1 && optimize_size \
19283        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19284
19285 /* Compute the register save mask for registers 0 through 12
19286    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19287
19288 static unsigned long
19289 arm_compute_save_reg0_reg12_mask (void)
19290 {
19291   unsigned long func_type = arm_current_func_type ();
19292   unsigned long save_reg_mask = 0;
19293   unsigned int reg;
19294
19295   if (IS_INTERRUPT (func_type))
19296     {
19297       unsigned int max_reg;
19298       /* Interrupt functions must not corrupt any registers,
19299          even call clobbered ones.  If this is a leaf function
19300          we can just examine the registers used by the RTL, but
19301          otherwise we have to assume that whatever function is
19302          called might clobber anything, and so we have to save
19303          all the call-clobbered registers as well.  */
19304       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19305         /* FIQ handlers have registers r8 - r12 banked, so
19306            we only need to check r0 - r7, Normal ISRs only
19307            bank r14 and r15, so we must check up to r12.
19308            r13 is the stack pointer which is always preserved,
19309            so we do not need to consider it here.  */
19310         max_reg = 7;
19311       else
19312         max_reg = 12;
19313
19314       for (reg = 0; reg <= max_reg; reg++)
19315         if (df_regs_ever_live_p (reg)
19316             || (! crtl->is_leaf && call_used_regs[reg]))
19317           save_reg_mask |= (1 << reg);
19318
19319       /* Also save the pic base register if necessary.  */
19320       if (flag_pic
19321           && !TARGET_SINGLE_PIC_BASE
19322           && arm_pic_register != INVALID_REGNUM
19323           && crtl->uses_pic_offset_table)
19324         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19325     }
19326   else if (IS_VOLATILE(func_type))
19327     {
19328       /* For noreturn functions we historically omitted register saves
19329          altogether.  However this really messes up debugging.  As a
19330          compromise save just the frame pointers.  Combined with the link
19331          register saved elsewhere this should be sufficient to get
19332          a backtrace.  */
19333       if (frame_pointer_needed)
19334         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19335       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19336         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19337       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19338         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19339     }
19340   else
19341     {
19342       /* In the normal case we only need to save those registers
19343          which are call saved and which are used by this function.  */
19344       for (reg = 0; reg <= 11; reg++)
19345         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19346           save_reg_mask |= (1 << reg);
19347
19348       /* Handle the frame pointer as a special case.  */
19349       if (frame_pointer_needed)
19350         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19351
19352       /* If we aren't loading the PIC register,
19353          don't stack it even though it may be live.  */
19354       if (flag_pic
19355           && !TARGET_SINGLE_PIC_BASE
19356           && arm_pic_register != INVALID_REGNUM
19357           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19358               || crtl->uses_pic_offset_table))
19359         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19360
19361       /* The prologue will copy SP into R0, so save it.  */
19362       if (IS_STACKALIGN (func_type))
19363         save_reg_mask |= 1;
19364     }
19365
19366   /* Save registers so the exception handler can modify them.  */
19367   if (crtl->calls_eh_return)
19368     {
19369       unsigned int i;
19370
19371       for (i = 0; ; i++)
19372         {
19373           reg = EH_RETURN_DATA_REGNO (i);
19374           if (reg == INVALID_REGNUM)
19375             break;
19376           save_reg_mask |= 1 << reg;
19377         }
19378     }
19379
19380   return save_reg_mask;
19381 }
19382
19383 /* Return true if r3 is live at the start of the function.  */
19384
19385 static bool
19386 arm_r3_live_at_start_p (void)
19387 {
19388   /* Just look at cfg info, which is still close enough to correct at this
19389      point.  This gives false positives for broken functions that might use
19390      uninitialized data that happens to be allocated in r3, but who cares?  */
19391   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19392 }
19393
19394 /* Compute the number of bytes used to store the static chain register on the
19395    stack, above the stack frame.  We need to know this accurately to get the
19396    alignment of the rest of the stack frame correct.  */
19397
19398 static int
19399 arm_compute_static_chain_stack_bytes (void)
19400 {
19401   /* See the defining assertion in arm_expand_prologue.  */
19402   if (IS_NESTED (arm_current_func_type ())
19403       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19404           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19405                || flag_stack_clash_protection)
19406               && !df_regs_ever_live_p (LR_REGNUM)))
19407       && arm_r3_live_at_start_p ()
19408       && crtl->args.pretend_args_size == 0)
19409     return 4;
19410
19411   return 0;
19412 }
19413
19414 /* Compute a bit mask of which core registers need to be
19415    saved on the stack for the current function.
19416    This is used by arm_compute_frame_layout, which may add extra registers.  */
19417
19418 static unsigned long
19419 arm_compute_save_core_reg_mask (void)
19420 {
19421   unsigned int save_reg_mask = 0;
19422   unsigned long func_type = arm_current_func_type ();
19423   unsigned int reg;
19424
19425   if (IS_NAKED (func_type))
19426     /* This should never really happen.  */
19427     return 0;
19428
19429   /* If we are creating a stack frame, then we must save the frame pointer,
19430      IP (which will hold the old stack pointer), LR and the PC.  */
19431   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19432     save_reg_mask |=
19433       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19434       | (1 << IP_REGNUM)
19435       | (1 << LR_REGNUM)
19436       | (1 << PC_REGNUM);
19437
19438   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19439
19440   /* Decide if we need to save the link register.
19441      Interrupt routines have their own banked link register,
19442      so they never need to save it.
19443      Otherwise if we do not use the link register we do not need to save
19444      it.  If we are pushing other registers onto the stack however, we
19445      can save an instruction in the epilogue by pushing the link register
19446      now and then popping it back into the PC.  This incurs extra memory
19447      accesses though, so we only do it when optimizing for size, and only
19448      if we know that we will not need a fancy return sequence.  */
19449   if (df_regs_ever_live_p (LR_REGNUM)
19450       || (save_reg_mask
19451           && optimize_size
19452           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19453           && !crtl->tail_call_emit
19454           && !crtl->calls_eh_return))
19455     save_reg_mask |= 1 << LR_REGNUM;
19456
19457   if (cfun->machine->lr_save_eliminated)
19458     save_reg_mask &= ~ (1 << LR_REGNUM);
19459
19460   if (TARGET_REALLY_IWMMXT
19461       && ((bit_count (save_reg_mask)
19462            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19463                            arm_compute_static_chain_stack_bytes())
19464            ) % 2) != 0)
19465     {
19466       /* The total number of registers that are going to be pushed
19467          onto the stack is odd.  We need to ensure that the stack
19468          is 64-bit aligned before we start to save iWMMXt registers,
19469          and also before we start to create locals.  (A local variable
19470          might be a double or long long which we will load/store using
19471          an iWMMXt instruction).  Therefore we need to push another
19472          ARM register, so that the stack will be 64-bit aligned.  We
19473          try to avoid using the arg registers (r0 -r3) as they might be
19474          used to pass values in a tail call.  */
19475       for (reg = 4; reg <= 12; reg++)
19476         if ((save_reg_mask & (1 << reg)) == 0)
19477           break;
19478
19479       if (reg <= 12)
19480         save_reg_mask |= (1 << reg);
19481       else
19482         {
19483           cfun->machine->sibcall_blocked = 1;
19484           save_reg_mask |= (1 << 3);
19485         }
19486     }
19487
19488   /* We may need to push an additional register for use initializing the
19489      PIC base register.  */
19490   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19491       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19492     {
19493       reg = thumb_find_work_register (1 << 4);
19494       if (!call_used_regs[reg])
19495         save_reg_mask |= (1 << reg);
19496     }
19497
19498   return save_reg_mask;
19499 }
19500
19501 /* Compute a bit mask of which core registers need to be
19502    saved on the stack for the current function.  */
19503 static unsigned long
19504 thumb1_compute_save_core_reg_mask (void)
19505 {
19506   unsigned long mask;
19507   unsigned reg;
19508
19509   mask = 0;
19510   for (reg = 0; reg < 12; reg ++)
19511     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19512       mask |= 1 << reg;
19513
19514   /* Handle the frame pointer as a special case.  */
19515   if (frame_pointer_needed)
19516     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19517
19518   if (flag_pic
19519       && !TARGET_SINGLE_PIC_BASE
19520       && arm_pic_register != INVALID_REGNUM
19521       && crtl->uses_pic_offset_table)
19522     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19523
19524   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19525   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19526     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19527
19528   /* LR will also be pushed if any lo regs are pushed.  */
19529   if (mask & 0xff || thumb_force_lr_save ())
19530     mask |= (1 << LR_REGNUM);
19531
19532   /* Make sure we have a low work register if we need one.
19533      We will need one if we are going to push a high register,
19534      but we are not currently intending to push a low register.  */
19535   if ((mask & 0xff) == 0
19536       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19537     {
19538       /* Use thumb_find_work_register to choose which register
19539          we will use.  If the register is live then we will
19540          have to push it.  Use LAST_LO_REGNUM as our fallback
19541          choice for the register to select.  */
19542       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19543       /* Make sure the register returned by thumb_find_work_register is
19544          not part of the return value.  */
19545       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19546         reg = LAST_LO_REGNUM;
19547
19548       if (callee_saved_reg_p (reg))
19549         mask |= 1 << reg;
19550     }
19551
19552   /* The 504 below is 8 bytes less than 512 because there are two possible
19553      alignment words.  We can't tell here if they will be present or not so we
19554      have to play it safe and assume that they are. */
19555   if ((CALLER_INTERWORKING_SLOT_SIZE +
19556        ROUND_UP_WORD (get_frame_size ()) +
19557        crtl->outgoing_args_size) >= 504)
19558     {
19559       /* This is the same as the code in thumb1_expand_prologue() which
19560          determines which register to use for stack decrement. */
19561       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19562         if (mask & (1 << reg))
19563           break;
19564
19565       if (reg > LAST_LO_REGNUM)
19566         {
19567           /* Make sure we have a register available for stack decrement. */
19568           mask |= 1 << LAST_LO_REGNUM;
19569         }
19570     }
19571
19572   return mask;
19573 }
19574
19575
19576 /* Return the number of bytes required to save VFP registers.  */
19577 static int
19578 arm_get_vfp_saved_size (void)
19579 {
19580   unsigned int regno;
19581   int count;
19582   int saved;
19583
19584   saved = 0;
19585   /* Space for saved VFP registers.  */
19586   if (TARGET_HARD_FLOAT)
19587     {
19588       count = 0;
19589       for (regno = FIRST_VFP_REGNUM;
19590            regno < LAST_VFP_REGNUM;
19591            regno += 2)
19592         {
19593           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19594               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19595             {
19596               if (count > 0)
19597                 {
19598                   /* Workaround ARM10 VFPr1 bug.  */
19599                   if (count == 2 && !arm_arch6)
19600                     count++;
19601                   saved += count * 8;
19602                 }
19603               count = 0;
19604             }
19605           else
19606             count++;
19607         }
19608       if (count > 0)
19609         {
19610           if (count == 2 && !arm_arch6)
19611             count++;
19612           saved += count * 8;
19613         }
19614     }
19615   return saved;
19616 }
19617
19618
19619 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19620    everything bar the final return instruction.  If simple_return is true,
19621    then do not output epilogue, because it has already been emitted in RTL.
19622
19623    Note: do not forget to update length attribute of corresponding insn pattern
19624    when changing assembly output (eg. length attribute of
19625    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19626    register clearing sequences).  */
19627 const char *
19628 output_return_instruction (rtx operand, bool really_return, bool reverse,
19629                            bool simple_return)
19630 {
19631   char conditional[10];
19632   char instr[100];
19633   unsigned reg;
19634   unsigned long live_regs_mask;
19635   unsigned long func_type;
19636   arm_stack_offsets *offsets;
19637
19638   func_type = arm_current_func_type ();
19639
19640   if (IS_NAKED (func_type))
19641     return "";
19642
19643   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19644     {
19645       /* If this function was declared non-returning, and we have
19646          found a tail call, then we have to trust that the called
19647          function won't return.  */
19648       if (really_return)
19649         {
19650           rtx ops[2];
19651
19652           /* Otherwise, trap an attempted return by aborting.  */
19653           ops[0] = operand;
19654           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19655                                        : "abort");
19656           assemble_external_libcall (ops[1]);
19657           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19658         }
19659
19660       return "";
19661     }
19662
19663   gcc_assert (!cfun->calls_alloca || really_return);
19664
19665   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19666
19667   cfun->machine->return_used_this_function = 1;
19668
19669   offsets = arm_get_frame_offsets ();
19670   live_regs_mask = offsets->saved_regs_mask;
19671
19672   if (!simple_return && live_regs_mask)
19673     {
19674       const char * return_reg;
19675
19676       /* If we do not have any special requirements for function exit
19677          (e.g. interworking) then we can load the return address
19678          directly into the PC.  Otherwise we must load it into LR.  */
19679       if (really_return
19680           && !IS_CMSE_ENTRY (func_type)
19681           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19682         return_reg = reg_names[PC_REGNUM];
19683       else
19684         return_reg = reg_names[LR_REGNUM];
19685
19686       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19687         {
19688           /* There are three possible reasons for the IP register
19689              being saved.  1) a stack frame was created, in which case
19690              IP contains the old stack pointer, or 2) an ISR routine
19691              corrupted it, or 3) it was saved to align the stack on
19692              iWMMXt.  In case 1, restore IP into SP, otherwise just
19693              restore IP.  */
19694           if (frame_pointer_needed)
19695             {
19696               live_regs_mask &= ~ (1 << IP_REGNUM);
19697               live_regs_mask |=   (1 << SP_REGNUM);
19698             }
19699           else
19700             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19701         }
19702
19703       /* On some ARM architectures it is faster to use LDR rather than
19704          LDM to load a single register.  On other architectures, the
19705          cost is the same.  In 26 bit mode, or for exception handlers,
19706          we have to use LDM to load the PC so that the CPSR is also
19707          restored.  */
19708       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19709         if (live_regs_mask == (1U << reg))
19710           break;
19711
19712       if (reg <= LAST_ARM_REGNUM
19713           && (reg != LR_REGNUM
19714               || ! really_return
19715               || ! IS_INTERRUPT (func_type)))
19716         {
19717           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19718                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19719         }
19720       else
19721         {
19722           char *p;
19723           int first = 1;
19724
19725           /* Generate the load multiple instruction to restore the
19726              registers.  Note we can get here, even if
19727              frame_pointer_needed is true, but only if sp already
19728              points to the base of the saved core registers.  */
19729           if (live_regs_mask & (1 << SP_REGNUM))
19730             {
19731               unsigned HOST_WIDE_INT stack_adjust;
19732
19733               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19734               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19735
19736               if (stack_adjust && arm_arch5 && TARGET_ARM)
19737                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19738               else
19739                 {
19740                   /* If we can't use ldmib (SA110 bug),
19741                      then try to pop r3 instead.  */
19742                   if (stack_adjust)
19743                     live_regs_mask |= 1 << 3;
19744
19745                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19746                 }
19747             }
19748           /* For interrupt returns we have to use an LDM rather than
19749              a POP so that we can use the exception return variant.  */
19750           else if (IS_INTERRUPT (func_type))
19751             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19752           else
19753             sprintf (instr, "pop%s\t{", conditional);
19754
19755           p = instr + strlen (instr);
19756
19757           for (reg = 0; reg <= SP_REGNUM; reg++)
19758             if (live_regs_mask & (1 << reg))
19759               {
19760                 int l = strlen (reg_names[reg]);
19761
19762                 if (first)
19763                   first = 0;
19764                 else
19765                   {
19766                     memcpy (p, ", ", 2);
19767                     p += 2;
19768                   }
19769
19770                 memcpy (p, "%|", 2);
19771                 memcpy (p + 2, reg_names[reg], l);
19772                 p += l + 2;
19773               }
19774
19775           if (live_regs_mask & (1 << LR_REGNUM))
19776             {
19777               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19778               /* If returning from an interrupt, restore the CPSR.  */
19779               if (IS_INTERRUPT (func_type))
19780                 strcat (p, "^");
19781             }
19782           else
19783             strcpy (p, "}");
19784         }
19785
19786       output_asm_insn (instr, & operand);
19787
19788       /* See if we need to generate an extra instruction to
19789          perform the actual function return.  */
19790       if (really_return
19791           && func_type != ARM_FT_INTERWORKED
19792           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19793         {
19794           /* The return has already been handled
19795              by loading the LR into the PC.  */
19796           return "";
19797         }
19798     }
19799
19800   if (really_return)
19801     {
19802       switch ((int) ARM_FUNC_TYPE (func_type))
19803         {
19804         case ARM_FT_ISR:
19805         case ARM_FT_FIQ:
19806           /* ??? This is wrong for unified assembly syntax.  */
19807           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19808           break;
19809
19810         case ARM_FT_INTERWORKED:
19811           gcc_assert (arm_arch5 || arm_arch4t);
19812           sprintf (instr, "bx%s\t%%|lr", conditional);
19813           break;
19814
19815         case ARM_FT_EXCEPTION:
19816           /* ??? This is wrong for unified assembly syntax.  */
19817           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19818           break;
19819
19820         default:
19821           if (IS_CMSE_ENTRY (func_type))
19822             {
19823               /* Check if we have to clear the 'GE bits' which is only used if
19824                  parallel add and subtraction instructions are available.  */
19825               if (TARGET_INT_SIMD)
19826                 snprintf (instr, sizeof (instr),
19827                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19828               else
19829                 snprintf (instr, sizeof (instr),
19830                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19831
19832               output_asm_insn (instr, & operand);
19833               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19834                 {
19835                   /* Clear the cumulative exception-status bits (0-4,7) and the
19836                      condition code bits (28-31) of the FPSCR.  We need to
19837                      remember to clear the first scratch register used (IP) and
19838                      save and restore the second (r4).  */
19839                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19840                   output_asm_insn (instr, & operand);
19841                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19842                   output_asm_insn (instr, & operand);
19843                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19844                   output_asm_insn (instr, & operand);
19845                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19846                   output_asm_insn (instr, & operand);
19847                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19848                   output_asm_insn (instr, & operand);
19849                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19850                   output_asm_insn (instr, & operand);
19851                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19852                   output_asm_insn (instr, & operand);
19853                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19854                   output_asm_insn (instr, & operand);
19855                 }
19856               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19857             }
19858           /* Use bx if it's available.  */
19859           else if (arm_arch5 || arm_arch4t)
19860             sprintf (instr, "bx%s\t%%|lr", conditional);
19861           else
19862             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19863           break;
19864         }
19865
19866       output_asm_insn (instr, & operand);
19867     }
19868
19869   return "";
19870 }
19871
19872 /* Output in FILE asm statements needed to declare the NAME of the function
19873    defined by its DECL node.  */
19874
19875 void
19876 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19877 {
19878   size_t cmse_name_len;
19879   char *cmse_name = 0;
19880   char cmse_prefix[] = "__acle_se_";
19881
19882   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19883      extra function label for each function with the 'cmse_nonsecure_entry'
19884      attribute.  This extra function label should be prepended with
19885      '__acle_se_', telling the linker that it needs to create secure gateway
19886      veneers for this function.  */
19887   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19888                                     DECL_ATTRIBUTES (decl)))
19889     {
19890       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19891       cmse_name = XALLOCAVEC (char, cmse_name_len);
19892       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19893       targetm.asm_out.globalize_label (file, cmse_name);
19894
19895       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19896       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19897     }
19898
19899   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19900   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19901   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19902   ASM_OUTPUT_LABEL (file, name);
19903
19904   if (cmse_name)
19905     ASM_OUTPUT_LABEL (file, cmse_name);
19906
19907   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19908 }
19909
19910 /* Write the function name into the code section, directly preceding
19911    the function prologue.
19912
19913    Code will be output similar to this:
19914      t0
19915          .ascii "arm_poke_function_name", 0
19916          .align
19917      t1
19918          .word 0xff000000 + (t1 - t0)
19919      arm_poke_function_name
19920          mov     ip, sp
19921          stmfd   sp!, {fp, ip, lr, pc}
19922          sub     fp, ip, #4
19923
19924    When performing a stack backtrace, code can inspect the value
19925    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19926    at location pc - 12 and the top 8 bits are set, then we know
19927    that there is a function name embedded immediately preceding this
19928    location and has length ((pc[-3]) & 0xff000000).
19929
19930    We assume that pc is declared as a pointer to an unsigned long.
19931
19932    It is of no benefit to output the function name if we are assembling
19933    a leaf function.  These function types will not contain a stack
19934    backtrace structure, therefore it is not possible to determine the
19935    function name.  */
19936 void
19937 arm_poke_function_name (FILE *stream, const char *name)
19938 {
19939   unsigned long alignlength;
19940   unsigned long length;
19941   rtx           x;
19942
19943   length      = strlen (name) + 1;
19944   alignlength = ROUND_UP_WORD (length);
19945
19946   ASM_OUTPUT_ASCII (stream, name, length);
19947   ASM_OUTPUT_ALIGN (stream, 2);
19948   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19949   assemble_aligned_integer (UNITS_PER_WORD, x);
19950 }
19951
19952 /* Place some comments into the assembler stream
19953    describing the current function.  */
19954 static void
19955 arm_output_function_prologue (FILE *f)
19956 {
19957   unsigned long func_type;
19958
19959   /* Sanity check.  */
19960   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19961
19962   func_type = arm_current_func_type ();
19963
19964   switch ((int) ARM_FUNC_TYPE (func_type))
19965     {
19966     default:
19967     case ARM_FT_NORMAL:
19968       break;
19969     case ARM_FT_INTERWORKED:
19970       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19971       break;
19972     case ARM_FT_ISR:
19973       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19974       break;
19975     case ARM_FT_FIQ:
19976       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19977       break;
19978     case ARM_FT_EXCEPTION:
19979       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19980       break;
19981     }
19982
19983   if (IS_NAKED (func_type))
19984     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19985
19986   if (IS_VOLATILE (func_type))
19987     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19988
19989   if (IS_NESTED (func_type))
19990     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19991   if (IS_STACKALIGN (func_type))
19992     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19993   if (IS_CMSE_ENTRY (func_type))
19994     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19995
19996   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19997                crtl->args.size,
19998                crtl->args.pretend_args_size,
19999                (HOST_WIDE_INT) get_frame_size ());
20000
20001   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20002                frame_pointer_needed,
20003                cfun->machine->uses_anonymous_args);
20004
20005   if (cfun->machine->lr_save_eliminated)
20006     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20007
20008   if (crtl->calls_eh_return)
20009     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20010
20011 }
20012
20013 static void
20014 arm_output_function_epilogue (FILE *)
20015 {
20016   arm_stack_offsets *offsets;
20017
20018   if (TARGET_THUMB1)
20019     {
20020       int regno;
20021
20022       /* Emit any call-via-reg trampolines that are needed for v4t support
20023          of call_reg and call_value_reg type insns.  */
20024       for (regno = 0; regno < LR_REGNUM; regno++)
20025         {
20026           rtx label = cfun->machine->call_via[regno];
20027
20028           if (label != NULL)
20029             {
20030               switch_to_section (function_section (current_function_decl));
20031               targetm.asm_out.internal_label (asm_out_file, "L",
20032                                               CODE_LABEL_NUMBER (label));
20033               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20034             }
20035         }
20036
20037       /* ??? Probably not safe to set this here, since it assumes that a
20038          function will be emitted as assembly immediately after we generate
20039          RTL for it.  This does not happen for inline functions.  */
20040       cfun->machine->return_used_this_function = 0;
20041     }
20042   else /* TARGET_32BIT */
20043     {
20044       /* We need to take into account any stack-frame rounding.  */
20045       offsets = arm_get_frame_offsets ();
20046
20047       gcc_assert (!use_return_insn (FALSE, NULL)
20048                   || (cfun->machine->return_used_this_function != 0)
20049                   || offsets->saved_regs == offsets->outgoing_args
20050                   || frame_pointer_needed);
20051     }
20052 }
20053
20054 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20055    STR and STRD.  If an even number of registers are being pushed, one
20056    or more STRD patterns are created for each register pair.  If an
20057    odd number of registers are pushed, emit an initial STR followed by
20058    as many STRD instructions as are needed.  This works best when the
20059    stack is initially 64-bit aligned (the normal case), since it
20060    ensures that each STRD is also 64-bit aligned.  */
20061 static void
20062 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20063 {
20064   int num_regs = 0;
20065   int i;
20066   int regno;
20067   rtx par = NULL_RTX;
20068   rtx dwarf = NULL_RTX;
20069   rtx tmp;
20070   bool first = true;
20071
20072   num_regs = bit_count (saved_regs_mask);
20073
20074   /* Must be at least one register to save, and can't save SP or PC.  */
20075   gcc_assert (num_regs > 0 && num_regs <= 14);
20076   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20077   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20078
20079   /* Create sequence for DWARF info.  All the frame-related data for
20080      debugging is held in this wrapper.  */
20081   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20082
20083   /* Describe the stack adjustment.  */
20084   tmp = gen_rtx_SET (stack_pointer_rtx,
20085                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20086   RTX_FRAME_RELATED_P (tmp) = 1;
20087   XVECEXP (dwarf, 0, 0) = tmp;
20088
20089   /* Find the first register.  */
20090   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20091     ;
20092
20093   i = 0;
20094
20095   /* If there's an odd number of registers to push.  Start off by
20096      pushing a single register.  This ensures that subsequent strd
20097      operations are dword aligned (assuming that SP was originally
20098      64-bit aligned).  */
20099   if ((num_regs & 1) != 0)
20100     {
20101       rtx reg, mem, insn;
20102
20103       reg = gen_rtx_REG (SImode, regno);
20104       if (num_regs == 1)
20105         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20106                                                      stack_pointer_rtx));
20107       else
20108         mem = gen_frame_mem (Pmode,
20109                              gen_rtx_PRE_MODIFY
20110                              (Pmode, stack_pointer_rtx,
20111                               plus_constant (Pmode, stack_pointer_rtx,
20112                                              -4 * num_regs)));
20113
20114       tmp = gen_rtx_SET (mem, reg);
20115       RTX_FRAME_RELATED_P (tmp) = 1;
20116       insn = emit_insn (tmp);
20117       RTX_FRAME_RELATED_P (insn) = 1;
20118       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20119       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20120       RTX_FRAME_RELATED_P (tmp) = 1;
20121       i++;
20122       regno++;
20123       XVECEXP (dwarf, 0, i) = tmp;
20124       first = false;
20125     }
20126
20127   while (i < num_regs)
20128     if (saved_regs_mask & (1 << regno))
20129       {
20130         rtx reg1, reg2, mem1, mem2;
20131         rtx tmp0, tmp1, tmp2;
20132         int regno2;
20133
20134         /* Find the register to pair with this one.  */
20135         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20136              regno2++)
20137           ;
20138
20139         reg1 = gen_rtx_REG (SImode, regno);
20140         reg2 = gen_rtx_REG (SImode, regno2);
20141
20142         if (first)
20143           {
20144             rtx insn;
20145
20146             first = false;
20147             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20148                                                         stack_pointer_rtx,
20149                                                         -4 * num_regs));
20150             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20151                                                         stack_pointer_rtx,
20152                                                         -4 * (num_regs - 1)));
20153             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20154                                 plus_constant (Pmode, stack_pointer_rtx,
20155                                                -4 * (num_regs)));
20156             tmp1 = gen_rtx_SET (mem1, reg1);
20157             tmp2 = gen_rtx_SET (mem2, reg2);
20158             RTX_FRAME_RELATED_P (tmp0) = 1;
20159             RTX_FRAME_RELATED_P (tmp1) = 1;
20160             RTX_FRAME_RELATED_P (tmp2) = 1;
20161             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20162             XVECEXP (par, 0, 0) = tmp0;
20163             XVECEXP (par, 0, 1) = tmp1;
20164             XVECEXP (par, 0, 2) = tmp2;
20165             insn = emit_insn (par);
20166             RTX_FRAME_RELATED_P (insn) = 1;
20167             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20168           }
20169         else
20170           {
20171             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20172                                                         stack_pointer_rtx,
20173                                                         4 * i));
20174             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20175                                                         stack_pointer_rtx,
20176                                                         4 * (i + 1)));
20177             tmp1 = gen_rtx_SET (mem1, reg1);
20178             tmp2 = gen_rtx_SET (mem2, reg2);
20179             RTX_FRAME_RELATED_P (tmp1) = 1;
20180             RTX_FRAME_RELATED_P (tmp2) = 1;
20181             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20182             XVECEXP (par, 0, 0) = tmp1;
20183             XVECEXP (par, 0, 1) = tmp2;
20184             emit_insn (par);
20185           }
20186
20187         /* Create unwind information.  This is an approximation.  */
20188         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20189                                            plus_constant (Pmode,
20190                                                           stack_pointer_rtx,
20191                                                           4 * i)),
20192                             reg1);
20193         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20194                                            plus_constant (Pmode,
20195                                                           stack_pointer_rtx,
20196                                                           4 * (i + 1))),
20197                             reg2);
20198
20199         RTX_FRAME_RELATED_P (tmp1) = 1;
20200         RTX_FRAME_RELATED_P (tmp2) = 1;
20201         XVECEXP (dwarf, 0, i + 1) = tmp1;
20202         XVECEXP (dwarf, 0, i + 2) = tmp2;
20203         i += 2;
20204         regno = regno2 + 1;
20205       }
20206     else
20207       regno++;
20208
20209   return;
20210 }
20211
20212 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20213    whenever possible, otherwise it emits single-word stores.  The first store
20214    also allocates stack space for all saved registers, using writeback with
20215    post-addressing mode.  All other stores use offset addressing.  If no STRD
20216    can be emitted, this function emits a sequence of single-word stores,
20217    and not an STM as before, because single-word stores provide more freedom
20218    scheduling and can be turned into an STM by peephole optimizations.  */
20219 static void
20220 arm_emit_strd_push (unsigned long saved_regs_mask)
20221 {
20222   int num_regs = 0;
20223   int i, j, dwarf_index  = 0;
20224   int offset = 0;
20225   rtx dwarf = NULL_RTX;
20226   rtx insn = NULL_RTX;
20227   rtx tmp, mem;
20228
20229   /* TODO: A more efficient code can be emitted by changing the
20230      layout, e.g., first push all pairs that can use STRD to keep the
20231      stack aligned, and then push all other registers.  */
20232   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20233     if (saved_regs_mask & (1 << i))
20234       num_regs++;
20235
20236   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20237   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20238   gcc_assert (num_regs > 0);
20239
20240   /* Create sequence for DWARF info.  */
20241   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20242
20243   /* For dwarf info, we generate explicit stack update.  */
20244   tmp = gen_rtx_SET (stack_pointer_rtx,
20245                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20246   RTX_FRAME_RELATED_P (tmp) = 1;
20247   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20248
20249   /* Save registers.  */
20250   offset = - 4 * num_regs;
20251   j = 0;
20252   while (j <= LAST_ARM_REGNUM)
20253     if (saved_regs_mask & (1 << j))
20254       {
20255         if ((j % 2 == 0)
20256             && (saved_regs_mask & (1 << (j + 1))))
20257           {
20258             /* Current register and previous register form register pair for
20259                which STRD can be generated.  */
20260             if (offset < 0)
20261               {
20262                 /* Allocate stack space for all saved registers.  */
20263                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20264                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20265                 mem = gen_frame_mem (DImode, tmp);
20266                 offset = 0;
20267               }
20268             else if (offset > 0)
20269               mem = gen_frame_mem (DImode,
20270                                    plus_constant (Pmode,
20271                                                   stack_pointer_rtx,
20272                                                   offset));
20273             else
20274               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20275
20276             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20277             RTX_FRAME_RELATED_P (tmp) = 1;
20278             tmp = emit_insn (tmp);
20279
20280             /* Record the first store insn.  */
20281             if (dwarf_index == 1)
20282               insn = tmp;
20283
20284             /* Generate dwarf info.  */
20285             mem = gen_frame_mem (SImode,
20286                                  plus_constant (Pmode,
20287                                                 stack_pointer_rtx,
20288                                                 offset));
20289             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20290             RTX_FRAME_RELATED_P (tmp) = 1;
20291             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20292
20293             mem = gen_frame_mem (SImode,
20294                                  plus_constant (Pmode,
20295                                                 stack_pointer_rtx,
20296                                                 offset + 4));
20297             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20298             RTX_FRAME_RELATED_P (tmp) = 1;
20299             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20300
20301             offset += 8;
20302             j += 2;
20303           }
20304         else
20305           {
20306             /* Emit a single word store.  */
20307             if (offset < 0)
20308               {
20309                 /* Allocate stack space for all saved registers.  */
20310                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20311                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20312                 mem = gen_frame_mem (SImode, tmp);
20313                 offset = 0;
20314               }
20315             else if (offset > 0)
20316               mem = gen_frame_mem (SImode,
20317                                    plus_constant (Pmode,
20318                                                   stack_pointer_rtx,
20319                                                   offset));
20320             else
20321               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20322
20323             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20324             RTX_FRAME_RELATED_P (tmp) = 1;
20325             tmp = emit_insn (tmp);
20326
20327             /* Record the first store insn.  */
20328             if (dwarf_index == 1)
20329               insn = tmp;
20330
20331             /* Generate dwarf info.  */
20332             mem = gen_frame_mem (SImode,
20333                                  plus_constant(Pmode,
20334                                                stack_pointer_rtx,
20335                                                offset));
20336             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20337             RTX_FRAME_RELATED_P (tmp) = 1;
20338             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20339
20340             offset += 4;
20341             j += 1;
20342           }
20343       }
20344     else
20345       j++;
20346
20347   /* Attach dwarf info to the first insn we generate.  */
20348   gcc_assert (insn != NULL_RTX);
20349   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20350   RTX_FRAME_RELATED_P (insn) = 1;
20351 }
20352
20353 /* Generate and emit an insn that we will recognize as a push_multi.
20354    Unfortunately, since this insn does not reflect very well the actual
20355    semantics of the operation, we need to annotate the insn for the benefit
20356    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20357    MASK for registers that should be annotated for DWARF2 frame unwind
20358    information.  */
20359 static rtx
20360 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20361 {
20362   int num_regs = 0;
20363   int num_dwarf_regs = 0;
20364   int i, j;
20365   rtx par;
20366   rtx dwarf;
20367   int dwarf_par_index;
20368   rtx tmp, reg;
20369
20370   /* We don't record the PC in the dwarf frame information.  */
20371   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20372
20373   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20374     {
20375       if (mask & (1 << i))
20376         num_regs++;
20377       if (dwarf_regs_mask & (1 << i))
20378         num_dwarf_regs++;
20379     }
20380
20381   gcc_assert (num_regs && num_regs <= 16);
20382   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20383
20384   /* For the body of the insn we are going to generate an UNSPEC in
20385      parallel with several USEs.  This allows the insn to be recognized
20386      by the push_multi pattern in the arm.md file.
20387
20388      The body of the insn looks something like this:
20389
20390        (parallel [
20391            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20392                                         (const_int:SI <num>)))
20393                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20394            (use (reg:SI XX))
20395            (use (reg:SI YY))
20396            ...
20397         ])
20398
20399      For the frame note however, we try to be more explicit and actually
20400      show each register being stored into the stack frame, plus a (single)
20401      decrement of the stack pointer.  We do it this way in order to be
20402      friendly to the stack unwinding code, which only wants to see a single
20403      stack decrement per instruction.  The RTL we generate for the note looks
20404      something like this:
20405
20406       (sequence [
20407            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20408            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20409            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20410            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20411            ...
20412         ])
20413
20414      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20415      instead we'd have a parallel expression detailing all
20416      the stores to the various memory addresses so that debug
20417      information is more up-to-date. Remember however while writing
20418      this to take care of the constraints with the push instruction.
20419
20420      Note also that this has to be taken care of for the VFP registers.
20421
20422      For more see PR43399.  */
20423
20424   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20425   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20426   dwarf_par_index = 1;
20427
20428   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20429     {
20430       if (mask & (1 << i))
20431         {
20432           reg = gen_rtx_REG (SImode, i);
20433
20434           XVECEXP (par, 0, 0)
20435             = gen_rtx_SET (gen_frame_mem
20436                            (BLKmode,
20437                             gen_rtx_PRE_MODIFY (Pmode,
20438                                                 stack_pointer_rtx,
20439                                                 plus_constant
20440                                                 (Pmode, stack_pointer_rtx,
20441                                                  -4 * num_regs))
20442                             ),
20443                            gen_rtx_UNSPEC (BLKmode,
20444                                            gen_rtvec (1, reg),
20445                                            UNSPEC_PUSH_MULT));
20446
20447           if (dwarf_regs_mask & (1 << i))
20448             {
20449               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20450                                  reg);
20451               RTX_FRAME_RELATED_P (tmp) = 1;
20452               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20453             }
20454
20455           break;
20456         }
20457     }
20458
20459   for (j = 1, i++; j < num_regs; i++)
20460     {
20461       if (mask & (1 << i))
20462         {
20463           reg = gen_rtx_REG (SImode, i);
20464
20465           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20466
20467           if (dwarf_regs_mask & (1 << i))
20468             {
20469               tmp
20470                 = gen_rtx_SET (gen_frame_mem
20471                                (SImode,
20472                                 plus_constant (Pmode, stack_pointer_rtx,
20473                                                4 * j)),
20474                                reg);
20475               RTX_FRAME_RELATED_P (tmp) = 1;
20476               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20477             }
20478
20479           j++;
20480         }
20481     }
20482
20483   par = emit_insn (par);
20484
20485   tmp = gen_rtx_SET (stack_pointer_rtx,
20486                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20487   RTX_FRAME_RELATED_P (tmp) = 1;
20488   XVECEXP (dwarf, 0, 0) = tmp;
20489
20490   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20491
20492   return par;
20493 }
20494
20495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20496    SIZE is the offset to be adjusted.
20497    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20498 static void
20499 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20500 {
20501   rtx dwarf;
20502
20503   RTX_FRAME_RELATED_P (insn) = 1;
20504   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20505   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20506 }
20507
20508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20509    SAVED_REGS_MASK shows which registers need to be restored.
20510
20511    Unfortunately, since this insn does not reflect very well the actual
20512    semantics of the operation, we need to annotate the insn for the benefit
20513    of DWARF2 frame unwind information.  */
20514 static void
20515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20516 {
20517   int num_regs = 0;
20518   int i, j;
20519   rtx par;
20520   rtx dwarf = NULL_RTX;
20521   rtx tmp, reg;
20522   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20523   int offset_adj;
20524   int emit_update;
20525
20526   offset_adj = return_in_pc ? 1 : 0;
20527   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20528     if (saved_regs_mask & (1 << i))
20529       num_regs++;
20530
20531   gcc_assert (num_regs && num_regs <= 16);
20532
20533   /* If SP is in reglist, then we don't emit SP update insn.  */
20534   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20535
20536   /* The parallel needs to hold num_regs SETs
20537      and one SET for the stack update.  */
20538   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20539
20540   if (return_in_pc)
20541     XVECEXP (par, 0, 0) = ret_rtx;
20542
20543   if (emit_update)
20544     {
20545       /* Increment the stack pointer, based on there being
20546          num_regs 4-byte registers to restore.  */
20547       tmp = gen_rtx_SET (stack_pointer_rtx,
20548                          plus_constant (Pmode,
20549                                         stack_pointer_rtx,
20550                                         4 * num_regs));
20551       RTX_FRAME_RELATED_P (tmp) = 1;
20552       XVECEXP (par, 0, offset_adj) = tmp;
20553     }
20554
20555   /* Now restore every reg, which may include PC.  */
20556   for (j = 0, i = 0; j < num_regs; i++)
20557     if (saved_regs_mask & (1 << i))
20558       {
20559         reg = gen_rtx_REG (SImode, i);
20560         if ((num_regs == 1) && emit_update && !return_in_pc)
20561           {
20562             /* Emit single load with writeback.  */
20563             tmp = gen_frame_mem (SImode,
20564                                  gen_rtx_POST_INC (Pmode,
20565                                                    stack_pointer_rtx));
20566             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20567             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20568             return;
20569           }
20570
20571         tmp = gen_rtx_SET (reg,
20572                            gen_frame_mem
20573                            (SImode,
20574                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20575         RTX_FRAME_RELATED_P (tmp) = 1;
20576         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20577
20578         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20579            should not have PC, skip PC.  */
20580         if (i != PC_REGNUM)
20581           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20582
20583         j++;
20584       }
20585
20586   if (return_in_pc)
20587     par = emit_jump_insn (par);
20588   else
20589     par = emit_insn (par);
20590
20591   REG_NOTES (par) = dwarf;
20592   if (!return_in_pc)
20593     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20594                                  stack_pointer_rtx, stack_pointer_rtx);
20595 }
20596
20597 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20598    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20599
20600    Unfortunately, since this insn does not reflect very well the actual
20601    semantics of the operation, we need to annotate the insn for the benefit
20602    of DWARF2 frame unwind information.  */
20603 static void
20604 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20605 {
20606   int i, j;
20607   rtx par;
20608   rtx dwarf = NULL_RTX;
20609   rtx tmp, reg;
20610
20611   gcc_assert (num_regs && num_regs <= 32);
20612
20613     /* Workaround ARM10 VFPr1 bug.  */
20614   if (num_regs == 2 && !arm_arch6)
20615     {
20616       if (first_reg == 15)
20617         first_reg--;
20618
20619       num_regs++;
20620     }
20621
20622   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20623      there could be up to 32 D-registers to restore.
20624      If there are more than 16 D-registers, make two recursive calls,
20625      each of which emits one pop_multi instruction.  */
20626   if (num_regs > 16)
20627     {
20628       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20629       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20630       return;
20631     }
20632
20633   /* The parallel needs to hold num_regs SETs
20634      and one SET for the stack update.  */
20635   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20636
20637   /* Increment the stack pointer, based on there being
20638      num_regs 8-byte registers to restore.  */
20639   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20640   RTX_FRAME_RELATED_P (tmp) = 1;
20641   XVECEXP (par, 0, 0) = tmp;
20642
20643   /* Now show every reg that will be restored, using a SET for each.  */
20644   for (j = 0, i=first_reg; j < num_regs; i += 2)
20645     {
20646       reg = gen_rtx_REG (DFmode, i);
20647
20648       tmp = gen_rtx_SET (reg,
20649                          gen_frame_mem
20650                          (DFmode,
20651                           plus_constant (Pmode, base_reg, 8 * j)));
20652       RTX_FRAME_RELATED_P (tmp) = 1;
20653       XVECEXP (par, 0, j + 1) = tmp;
20654
20655       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20656
20657       j++;
20658     }
20659
20660   par = emit_insn (par);
20661   REG_NOTES (par) = dwarf;
20662
20663   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20664   if (REGNO (base_reg) == IP_REGNUM)
20665     {
20666       RTX_FRAME_RELATED_P (par) = 1;
20667       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20668     }
20669   else
20670     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20671                                  base_reg, base_reg);
20672 }
20673
20674 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20675    number of registers are being popped, multiple LDRD patterns are created for
20676    all register pairs.  If odd number of registers are popped, last register is
20677    loaded by using LDR pattern.  */
20678 static void
20679 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20680 {
20681   int num_regs = 0;
20682   int i, j;
20683   rtx par = NULL_RTX;
20684   rtx dwarf = NULL_RTX;
20685   rtx tmp, reg, tmp1;
20686   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20687
20688   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20689     if (saved_regs_mask & (1 << i))
20690       num_regs++;
20691
20692   gcc_assert (num_regs && num_regs <= 16);
20693
20694   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20695      to be popped.  So, if num_regs is even, now it will become odd,
20696      and we can generate pop with PC.  If num_regs is odd, it will be
20697      even now, and ldr with return can be generated for PC.  */
20698   if (return_in_pc)
20699     num_regs--;
20700
20701   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20702
20703   /* Var j iterates over all the registers to gather all the registers in
20704      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20705      A PARALLEL RTX of register-pair is created here, so that pattern for
20706      LDRD can be matched.  As PC is always last register to be popped, and
20707      we have already decremented num_regs if PC, we don't have to worry
20708      about PC in this loop.  */
20709   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20710     if (saved_regs_mask & (1 << j))
20711       {
20712         /* Create RTX for memory load.  */
20713         reg = gen_rtx_REG (SImode, j);
20714         tmp = gen_rtx_SET (reg,
20715                            gen_frame_mem (SImode,
20716                                plus_constant (Pmode,
20717                                               stack_pointer_rtx, 4 * i)));
20718         RTX_FRAME_RELATED_P (tmp) = 1;
20719
20720         if (i % 2 == 0)
20721           {
20722             /* When saved-register index (i) is even, the RTX to be emitted is
20723                yet to be created.  Hence create it first.  The LDRD pattern we
20724                are generating is :
20725                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20726                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20727                where target registers need not be consecutive.  */
20728             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20729             dwarf = NULL_RTX;
20730           }
20731
20732         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20733            added as 0th element and if i is odd, reg_i is added as 1st element
20734            of LDRD pattern shown above.  */
20735         XVECEXP (par, 0, (i % 2)) = tmp;
20736         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20737
20738         if ((i % 2) == 1)
20739           {
20740             /* When saved-register index (i) is odd, RTXs for both the registers
20741                to be loaded are generated in above given LDRD pattern, and the
20742                pattern can be emitted now.  */
20743             par = emit_insn (par);
20744             REG_NOTES (par) = dwarf;
20745             RTX_FRAME_RELATED_P (par) = 1;
20746           }
20747
20748         i++;
20749       }
20750
20751   /* If the number of registers pushed is odd AND return_in_pc is false OR
20752      number of registers are even AND return_in_pc is true, last register is
20753      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20754      then LDR with post increment.  */
20755
20756   /* Increment the stack pointer, based on there being
20757      num_regs 4-byte registers to restore.  */
20758   tmp = gen_rtx_SET (stack_pointer_rtx,
20759                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20760   RTX_FRAME_RELATED_P (tmp) = 1;
20761   tmp = emit_insn (tmp);
20762   if (!return_in_pc)
20763     {
20764       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20765                                    stack_pointer_rtx, stack_pointer_rtx);
20766     }
20767
20768   dwarf = NULL_RTX;
20769
20770   if (((num_regs % 2) == 1 && !return_in_pc)
20771       || ((num_regs % 2) == 0 && return_in_pc))
20772     {
20773       /* Scan for the single register to be popped.  Skip until the saved
20774          register is found.  */
20775       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20776
20777       /* Gen LDR with post increment here.  */
20778       tmp1 = gen_rtx_MEM (SImode,
20779                           gen_rtx_POST_INC (SImode,
20780                                             stack_pointer_rtx));
20781       set_mem_alias_set (tmp1, get_frame_alias_set ());
20782
20783       reg = gen_rtx_REG (SImode, j);
20784       tmp = gen_rtx_SET (reg, tmp1);
20785       RTX_FRAME_RELATED_P (tmp) = 1;
20786       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20787
20788       if (return_in_pc)
20789         {
20790           /* If return_in_pc, j must be PC_REGNUM.  */
20791           gcc_assert (j == PC_REGNUM);
20792           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20793           XVECEXP (par, 0, 0) = ret_rtx;
20794           XVECEXP (par, 0, 1) = tmp;
20795           par = emit_jump_insn (par);
20796         }
20797       else
20798         {
20799           par = emit_insn (tmp);
20800           REG_NOTES (par) = dwarf;
20801           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20802                                        stack_pointer_rtx, stack_pointer_rtx);
20803         }
20804
20805     }
20806   else if ((num_regs % 2) == 1 && return_in_pc)
20807     {
20808       /* There are 2 registers to be popped.  So, generate the pattern
20809          pop_multiple_with_stack_update_and_return to pop in PC.  */
20810       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20811     }
20812
20813   return;
20814 }
20815
20816 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20817    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20818    offset addressing and then generates one separate stack udpate. This provides
20819    more scheduling freedom, compared to writeback on every load.  However,
20820    if the function returns using load into PC directly
20821    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20822    before the last load.  TODO: Add a peephole optimization to recognize
20823    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20824    peephole optimization to merge the load at stack-offset zero
20825    with the stack update instruction using load with writeback
20826    in post-index addressing mode.  */
20827 static void
20828 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20829 {
20830   int j = 0;
20831   int offset = 0;
20832   rtx par = NULL_RTX;
20833   rtx dwarf = NULL_RTX;
20834   rtx tmp, mem;
20835
20836   /* Restore saved registers.  */
20837   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20838   j = 0;
20839   while (j <= LAST_ARM_REGNUM)
20840     if (saved_regs_mask & (1 << j))
20841       {
20842         if ((j % 2) == 0
20843             && (saved_regs_mask & (1 << (j + 1)))
20844             && (j + 1) != PC_REGNUM)
20845           {
20846             /* Current register and next register form register pair for which
20847                LDRD can be generated. PC is always the last register popped, and
20848                we handle it separately.  */
20849             if (offset > 0)
20850               mem = gen_frame_mem (DImode,
20851                                    plus_constant (Pmode,
20852                                                   stack_pointer_rtx,
20853                                                   offset));
20854             else
20855               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20856
20857             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20858             tmp = emit_insn (tmp);
20859             RTX_FRAME_RELATED_P (tmp) = 1;
20860
20861             /* Generate dwarf info.  */
20862
20863             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20864                                     gen_rtx_REG (SImode, j),
20865                                     NULL_RTX);
20866             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20867                                     gen_rtx_REG (SImode, j + 1),
20868                                     dwarf);
20869
20870             REG_NOTES (tmp) = dwarf;
20871
20872             offset += 8;
20873             j += 2;
20874           }
20875         else if (j != PC_REGNUM)
20876           {
20877             /* Emit a single word load.  */
20878             if (offset > 0)
20879               mem = gen_frame_mem (SImode,
20880                                    plus_constant (Pmode,
20881                                                   stack_pointer_rtx,
20882                                                   offset));
20883             else
20884               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20885
20886             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20887             tmp = emit_insn (tmp);
20888             RTX_FRAME_RELATED_P (tmp) = 1;
20889
20890             /* Generate dwarf info.  */
20891             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20892                                               gen_rtx_REG (SImode, j),
20893                                               NULL_RTX);
20894
20895             offset += 4;
20896             j += 1;
20897           }
20898         else /* j == PC_REGNUM */
20899           j++;
20900       }
20901     else
20902       j++;
20903
20904   /* Update the stack.  */
20905   if (offset > 0)
20906     {
20907       tmp = gen_rtx_SET (stack_pointer_rtx,
20908                          plus_constant (Pmode,
20909                                         stack_pointer_rtx,
20910                                         offset));
20911       tmp = emit_insn (tmp);
20912       arm_add_cfa_adjust_cfa_note (tmp, offset,
20913                                    stack_pointer_rtx, stack_pointer_rtx);
20914       offset = 0;
20915     }
20916
20917   if (saved_regs_mask & (1 << PC_REGNUM))
20918     {
20919       /* Only PC is to be popped.  */
20920       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20921       XVECEXP (par, 0, 0) = ret_rtx;
20922       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20923                          gen_frame_mem (SImode,
20924                                         gen_rtx_POST_INC (SImode,
20925                                                           stack_pointer_rtx)));
20926       RTX_FRAME_RELATED_P (tmp) = 1;
20927       XVECEXP (par, 0, 1) = tmp;
20928       par = emit_jump_insn (par);
20929
20930       /* Generate dwarf info.  */
20931       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20932                               gen_rtx_REG (SImode, PC_REGNUM),
20933                               NULL_RTX);
20934       REG_NOTES (par) = dwarf;
20935       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20936                                    stack_pointer_rtx, stack_pointer_rtx);
20937     }
20938 }
20939
20940 /* Calculate the size of the return value that is passed in registers.  */
20941 static unsigned
20942 arm_size_return_regs (void)
20943 {
20944   machine_mode mode;
20945
20946   if (crtl->return_rtx != 0)
20947     mode = GET_MODE (crtl->return_rtx);
20948   else
20949     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20950
20951   return GET_MODE_SIZE (mode);
20952 }
20953
20954 /* Return true if the current function needs to save/restore LR.  */
20955 static bool
20956 thumb_force_lr_save (void)
20957 {
20958   return !cfun->machine->lr_save_eliminated
20959          && (!crtl->is_leaf
20960              || thumb_far_jump_used_p ()
20961              || df_regs_ever_live_p (LR_REGNUM));
20962 }
20963
20964 /* We do not know if r3 will be available because
20965    we do have an indirect tailcall happening in this
20966    particular case.  */
20967 static bool
20968 is_indirect_tailcall_p (rtx call)
20969 {
20970   rtx pat = PATTERN (call);
20971
20972   /* Indirect tail call.  */
20973   pat = XVECEXP (pat, 0, 0);
20974   if (GET_CODE (pat) == SET)
20975     pat = SET_SRC (pat);
20976
20977   pat = XEXP (XEXP (pat, 0), 0);
20978   return REG_P (pat);
20979 }
20980
20981 /* Return true if r3 is used by any of the tail call insns in the
20982    current function.  */
20983 static bool
20984 any_sibcall_could_use_r3 (void)
20985 {
20986   edge_iterator ei;
20987   edge e;
20988
20989   if (!crtl->tail_call_emit)
20990     return false;
20991   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20992     if (e->flags & EDGE_SIBCALL)
20993       {
20994         rtx_insn *call = BB_END (e->src);
20995         if (!CALL_P (call))
20996           call = prev_nonnote_nondebug_insn (call);
20997         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20998         if (find_regno_fusage (call, USE, 3)
20999             || is_indirect_tailcall_p (call))
21000           return true;
21001       }
21002   return false;
21003 }
21004
21005
21006 /* Compute the distance from register FROM to register TO.
21007    These can be the arg pointer (26), the soft frame pointer (25),
21008    the stack pointer (13) or the hard frame pointer (11).
21009    In thumb mode r7 is used as the soft frame pointer, if needed.
21010    Typical stack layout looks like this:
21011
21012        old stack pointer -> |    |
21013                              ----
21014                             |    | \
21015                             |    |   saved arguments for
21016                             |    |   vararg functions
21017                             |    | /
21018                               --
21019    hard FP & arg pointer -> |    | \
21020                             |    |   stack
21021                             |    |   frame
21022                             |    | /
21023                               --
21024                             |    | \
21025                             |    |   call saved
21026                             |    |   registers
21027       soft frame pointer -> |    | /
21028                               --
21029                             |    | \
21030                             |    |   local
21031                             |    |   variables
21032      locals base pointer -> |    | /
21033                               --
21034                             |    | \
21035                             |    |   outgoing
21036                             |    |   arguments
21037    current stack pointer -> |    | /
21038                               --
21039
21040   For a given function some or all of these stack components
21041   may not be needed, giving rise to the possibility of
21042   eliminating some of the registers.
21043
21044   The values returned by this function must reflect the behavior
21045   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21046
21047   The sign of the number returned reflects the direction of stack
21048   growth, so the values are positive for all eliminations except
21049   from the soft frame pointer to the hard frame pointer.
21050
21051   SFP may point just inside the local variables block to ensure correct
21052   alignment.  */
21053
21054
21055 /* Return cached stack offsets.  */
21056
21057 static arm_stack_offsets *
21058 arm_get_frame_offsets (void)
21059 {
21060   struct arm_stack_offsets *offsets;
21061
21062   offsets = &cfun->machine->stack_offsets;
21063
21064   return offsets;
21065 }
21066
21067
21068 /* Calculate stack offsets.  These are used to calculate register elimination
21069    offsets and in prologue/epilogue code.  Also calculates which registers
21070    should be saved.  */
21071
21072 static void
21073 arm_compute_frame_layout (void)
21074 {
21075   struct arm_stack_offsets *offsets;
21076   unsigned long func_type;
21077   int saved;
21078   int core_saved;
21079   HOST_WIDE_INT frame_size;
21080   int i;
21081
21082   offsets = &cfun->machine->stack_offsets;
21083
21084   /* Initially this is the size of the local variables.  It will translated
21085      into an offset once we have determined the size of preceding data.  */
21086   frame_size = ROUND_UP_WORD (get_frame_size ());
21087
21088   /* Space for variadic functions.  */
21089   offsets->saved_args = crtl->args.pretend_args_size;
21090
21091   /* In Thumb mode this is incorrect, but never used.  */
21092   offsets->frame
21093     = (offsets->saved_args
21094        + arm_compute_static_chain_stack_bytes ()
21095        + (frame_pointer_needed ? 4 : 0));
21096
21097   if (TARGET_32BIT)
21098     {
21099       unsigned int regno;
21100
21101       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21102       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21103       saved = core_saved;
21104
21105       /* We know that SP will be doubleword aligned on entry, and we must
21106          preserve that condition at any subroutine call.  We also require the
21107          soft frame pointer to be doubleword aligned.  */
21108
21109       if (TARGET_REALLY_IWMMXT)
21110         {
21111           /* Check for the call-saved iWMMXt registers.  */
21112           for (regno = FIRST_IWMMXT_REGNUM;
21113                regno <= LAST_IWMMXT_REGNUM;
21114                regno++)
21115             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21116               saved += 8;
21117         }
21118
21119       func_type = arm_current_func_type ();
21120       /* Space for saved VFP registers.  */
21121       if (! IS_VOLATILE (func_type)
21122           && TARGET_HARD_FLOAT)
21123         saved += arm_get_vfp_saved_size ();
21124     }
21125   else /* TARGET_THUMB1 */
21126     {
21127       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21128       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21129       saved = core_saved;
21130       if (TARGET_BACKTRACE)
21131         saved += 16;
21132     }
21133
21134   /* Saved registers include the stack frame.  */
21135   offsets->saved_regs
21136     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21137   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21138
21139   /* A leaf function does not need any stack alignment if it has nothing
21140      on the stack.  */
21141   if (crtl->is_leaf && frame_size == 0
21142       /* However if it calls alloca(), we have a dynamically allocated
21143          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21144       && ! cfun->calls_alloca)
21145     {
21146       offsets->outgoing_args = offsets->soft_frame;
21147       offsets->locals_base = offsets->soft_frame;
21148       return;
21149     }
21150
21151   /* Ensure SFP has the correct alignment.  */
21152   if (ARM_DOUBLEWORD_ALIGN
21153       && (offsets->soft_frame & 7))
21154     {
21155       offsets->soft_frame += 4;
21156       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21157          when there is a stack frame as the alignment will be rolled into
21158          the normal stack adjustment.  */
21159       if (frame_size + crtl->outgoing_args_size == 0)
21160         {
21161           int reg = -1;
21162
21163           /* Register r3 is caller-saved.  Normally it does not need to be
21164              saved on entry by the prologue.  However if we choose to save
21165              it for padding then we may confuse the compiler into thinking
21166              a prologue sequence is required when in fact it is not.  This
21167              will occur when shrink-wrapping if r3 is used as a scratch
21168              register and there are no other callee-saved writes.
21169
21170              This situation can be avoided when other callee-saved registers
21171              are available and r3 is not mandatory if we choose a callee-saved
21172              register for padding.  */
21173           bool prefer_callee_reg_p = false;
21174
21175           /* If it is safe to use r3, then do so.  This sometimes
21176              generates better code on Thumb-2 by avoiding the need to
21177              use 32-bit push/pop instructions.  */
21178           if (! any_sibcall_could_use_r3 ()
21179               && arm_size_return_regs () <= 12
21180               && (offsets->saved_regs_mask & (1 << 3)) == 0
21181               && (TARGET_THUMB2
21182                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21183             {
21184               reg = 3;
21185               if (!TARGET_THUMB2)
21186                 prefer_callee_reg_p = true;
21187             }
21188           if (reg == -1
21189               || prefer_callee_reg_p)
21190             {
21191               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21192                 {
21193                   /* Avoid fixed registers; they may be changed at
21194                      arbitrary times so it's unsafe to restore them
21195                      during the epilogue.  */
21196                   if (!fixed_regs[i]
21197                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21198                     {
21199                       reg = i;
21200                       break;
21201                     }
21202                 }
21203             }
21204
21205           if (reg != -1)
21206             {
21207               offsets->saved_regs += 4;
21208               offsets->saved_regs_mask |= (1 << reg);
21209             }
21210         }
21211     }
21212
21213   offsets->locals_base = offsets->soft_frame + frame_size;
21214   offsets->outgoing_args = (offsets->locals_base
21215                             + crtl->outgoing_args_size);
21216
21217   if (ARM_DOUBLEWORD_ALIGN)
21218     {
21219       /* Ensure SP remains doubleword aligned.  */
21220       if (offsets->outgoing_args & 7)
21221         offsets->outgoing_args += 4;
21222       gcc_assert (!(offsets->outgoing_args & 7));
21223     }
21224 }
21225
21226
21227 /* Calculate the relative offsets for the different stack pointers.  Positive
21228    offsets are in the direction of stack growth.  */
21229
21230 HOST_WIDE_INT
21231 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21232 {
21233   arm_stack_offsets *offsets;
21234
21235   offsets = arm_get_frame_offsets ();
21236
21237   /* OK, now we have enough information to compute the distances.
21238      There must be an entry in these switch tables for each pair
21239      of registers in ELIMINABLE_REGS, even if some of the entries
21240      seem to be redundant or useless.  */
21241   switch (from)
21242     {
21243     case ARG_POINTER_REGNUM:
21244       switch (to)
21245         {
21246         case THUMB_HARD_FRAME_POINTER_REGNUM:
21247           return 0;
21248
21249         case FRAME_POINTER_REGNUM:
21250           /* This is the reverse of the soft frame pointer
21251              to hard frame pointer elimination below.  */
21252           return offsets->soft_frame - offsets->saved_args;
21253
21254         case ARM_HARD_FRAME_POINTER_REGNUM:
21255           /* This is only non-zero in the case where the static chain register
21256              is stored above the frame.  */
21257           return offsets->frame - offsets->saved_args - 4;
21258
21259         case STACK_POINTER_REGNUM:
21260           /* If nothing has been pushed on the stack at all
21261              then this will return -4.  This *is* correct!  */
21262           return offsets->outgoing_args - (offsets->saved_args + 4);
21263
21264         default:
21265           gcc_unreachable ();
21266         }
21267       gcc_unreachable ();
21268
21269     case FRAME_POINTER_REGNUM:
21270       switch (to)
21271         {
21272         case THUMB_HARD_FRAME_POINTER_REGNUM:
21273           return 0;
21274
21275         case ARM_HARD_FRAME_POINTER_REGNUM:
21276           /* The hard frame pointer points to the top entry in the
21277              stack frame.  The soft frame pointer to the bottom entry
21278              in the stack frame.  If there is no stack frame at all,
21279              then they are identical.  */
21280
21281           return offsets->frame - offsets->soft_frame;
21282
21283         case STACK_POINTER_REGNUM:
21284           return offsets->outgoing_args - offsets->soft_frame;
21285
21286         default:
21287           gcc_unreachable ();
21288         }
21289       gcc_unreachable ();
21290
21291     default:
21292       /* You cannot eliminate from the stack pointer.
21293          In theory you could eliminate from the hard frame
21294          pointer to the stack pointer, but this will never
21295          happen, since if a stack frame is not needed the
21296          hard frame pointer will never be used.  */
21297       gcc_unreachable ();
21298     }
21299 }
21300
21301 /* Given FROM and TO register numbers, say whether this elimination is
21302    allowed.  Frame pointer elimination is automatically handled.
21303
21304    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21305    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21306    pointer, we must eliminate FRAME_POINTER_REGNUM into
21307    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21308    ARG_POINTER_REGNUM.  */
21309
21310 bool
21311 arm_can_eliminate (const int from, const int to)
21312 {
21313   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21314           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21315           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21316           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21317            true);
21318 }
21319
21320 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21321    number of bytes pushed.  */
21322
21323 static int
21324 arm_save_coproc_regs(void)
21325 {
21326   int saved_size = 0;
21327   unsigned reg;
21328   unsigned start_reg;
21329   rtx insn;
21330
21331   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21332     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21333       {
21334         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21335         insn = gen_rtx_MEM (V2SImode, insn);
21336         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21337         RTX_FRAME_RELATED_P (insn) = 1;
21338         saved_size += 8;
21339       }
21340
21341   if (TARGET_HARD_FLOAT)
21342     {
21343       start_reg = FIRST_VFP_REGNUM;
21344
21345       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21346         {
21347           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21348               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21349             {
21350               if (start_reg != reg)
21351                 saved_size += vfp_emit_fstmd (start_reg,
21352                                               (reg - start_reg) / 2);
21353               start_reg = reg + 2;
21354             }
21355         }
21356       if (start_reg != reg)
21357         saved_size += vfp_emit_fstmd (start_reg,
21358                                       (reg - start_reg) / 2);
21359     }
21360   return saved_size;
21361 }
21362
21363
21364 /* Set the Thumb frame pointer from the stack pointer.  */
21365
21366 static void
21367 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21368 {
21369   HOST_WIDE_INT amount;
21370   rtx insn, dwarf;
21371
21372   amount = offsets->outgoing_args - offsets->locals_base;
21373   if (amount < 1024)
21374     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21375                                   stack_pointer_rtx, GEN_INT (amount)));
21376   else
21377     {
21378       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21379       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21380          expects the first two operands to be the same.  */
21381       if (TARGET_THUMB2)
21382         {
21383           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21384                                         stack_pointer_rtx,
21385                                         hard_frame_pointer_rtx));
21386         }
21387       else
21388         {
21389           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21390                                         hard_frame_pointer_rtx,
21391                                         stack_pointer_rtx));
21392         }
21393       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21394                            plus_constant (Pmode, stack_pointer_rtx, amount));
21395       RTX_FRAME_RELATED_P (dwarf) = 1;
21396       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21397     }
21398
21399   RTX_FRAME_RELATED_P (insn) = 1;
21400 }
21401
21402 struct scratch_reg {
21403   rtx reg;
21404   bool saved;
21405 };
21406
21407 /* Return a short-lived scratch register for use as a 2nd scratch register on
21408    function entry after the registers are saved in the prologue.  This register
21409    must be released by means of release_scratch_register_on_entry.  IP is not
21410    considered since it is always used as the 1st scratch register if available.
21411
21412    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21413    mask of live registers.  */
21414
21415 static void
21416 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21417                                unsigned long live_regs)
21418 {
21419   int regno = -1;
21420
21421   sr->saved = false;
21422
21423   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21424     regno = LR_REGNUM;
21425   else
21426     {
21427       unsigned int i;
21428
21429       for (i = 4; i < 11; i++)
21430         if (regno1 != i && (live_regs & (1 << i)) != 0)
21431           {
21432             regno = i;
21433             break;
21434           }
21435
21436       if (regno < 0)
21437         {
21438           /* If IP is used as the 1st scratch register for a nested function,
21439              then either r3 wasn't available or is used to preserve IP.  */
21440           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21441             regno1 = 3;
21442           regno = (regno1 == 3 ? 2 : 3);
21443           sr->saved
21444             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21445                                regno);
21446         }
21447     }
21448
21449   sr->reg = gen_rtx_REG (SImode, regno);
21450   if (sr->saved)
21451     {
21452       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21453       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21454       rtx x = gen_rtx_SET (stack_pointer_rtx,
21455                            plus_constant (Pmode, stack_pointer_rtx, -4));
21456       RTX_FRAME_RELATED_P (insn) = 1;
21457       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21458     }
21459 }
21460
21461 /* Release a scratch register obtained from the preceding function.  */
21462
21463 static void
21464 release_scratch_register_on_entry (struct scratch_reg *sr)
21465 {
21466   if (sr->saved)
21467     {
21468       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21469       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21470       rtx x = gen_rtx_SET (stack_pointer_rtx,
21471                            plus_constant (Pmode, stack_pointer_rtx, 4));
21472       RTX_FRAME_RELATED_P (insn) = 1;
21473       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21474     }
21475 }
21476
21477 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21478
21479 #if PROBE_INTERVAL > 4096
21480 #error Cannot use indexed addressing mode for stack probing
21481 #endif
21482
21483 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21484    inclusive.  These are offsets from the current stack pointer.  REGNO1
21485    is the index number of the 1st scratch register and LIVE_REGS is the
21486    mask of live registers.  */
21487
21488 static void
21489 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21490                             unsigned int regno1, unsigned long live_regs)
21491 {
21492   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21493
21494   /* See if we have a constant small number of probes to generate.  If so,
21495      that's the easy case.  */
21496   if (size <= PROBE_INTERVAL)
21497     {
21498       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21499       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21500       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21501     }
21502
21503   /* The run-time loop is made up of 10 insns in the generic case while the
21504      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21505   else if (size <= 5 * PROBE_INTERVAL)
21506     {
21507       HOST_WIDE_INT i, rem;
21508
21509       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21510       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21511       emit_stack_probe (reg1);
21512
21513       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21514          it exceeds SIZE.  If only two probes are needed, this will not
21515          generate any code.  Then probe at FIRST + SIZE.  */
21516       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21517         {
21518           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21519           emit_stack_probe (reg1);
21520         }
21521
21522       rem = size - (i - PROBE_INTERVAL);
21523       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21524         {
21525           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21526           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21527         }
21528       else
21529         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21530     }
21531
21532   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21533      extra careful with variables wrapping around because we might be at
21534      the very top (or the very bottom) of the address space and we have
21535      to be able to handle this case properly; in particular, we use an
21536      equality test for the loop condition.  */
21537   else
21538     {
21539       HOST_WIDE_INT rounded_size;
21540       struct scratch_reg sr;
21541
21542       get_scratch_register_on_entry (&sr, regno1, live_regs);
21543
21544       emit_move_insn (reg1, GEN_INT (first));
21545
21546
21547       /* Step 1: round SIZE to the previous multiple of the interval.  */
21548
21549       rounded_size = size & -PROBE_INTERVAL;
21550       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21551
21552
21553       /* Step 2: compute initial and final value of the loop counter.  */
21554
21555       /* TEST_ADDR = SP + FIRST.  */
21556       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21557
21558       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21559       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21560
21561
21562       /* Step 3: the loop
21563
21564          do
21565            {
21566              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21567              probe at TEST_ADDR
21568            }
21569          while (TEST_ADDR != LAST_ADDR)
21570
21571          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21572          until it is equal to ROUNDED_SIZE.  */
21573
21574       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21575
21576
21577       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21578          that SIZE is equal to ROUNDED_SIZE.  */
21579
21580       if (size != rounded_size)
21581         {
21582           HOST_WIDE_INT rem = size - rounded_size;
21583
21584           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21585             {
21586               emit_set_insn (sr.reg,
21587                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21588               emit_stack_probe (plus_constant (Pmode, sr.reg,
21589                                                PROBE_INTERVAL - rem));
21590             }
21591           else
21592             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21593         }
21594
21595       release_scratch_register_on_entry (&sr);
21596     }
21597
21598   /* Make sure nothing is scheduled before we are done.  */
21599   emit_insn (gen_blockage ());
21600 }
21601
21602 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21603    absolute addresses.  */
21604
21605 const char *
21606 output_probe_stack_range (rtx reg1, rtx reg2)
21607 {
21608   static int labelno = 0;
21609   char loop_lab[32];
21610   rtx xops[2];
21611
21612   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21613
21614   /* Loop.  */
21615   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21616
21617   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21618   xops[0] = reg1;
21619   xops[1] = GEN_INT (PROBE_INTERVAL);
21620   output_asm_insn ("sub\t%0, %0, %1", xops);
21621
21622   /* Probe at TEST_ADDR.  */
21623   output_asm_insn ("str\tr0, [%0, #0]", xops);
21624
21625   /* Test if TEST_ADDR == LAST_ADDR.  */
21626   xops[1] = reg2;
21627   output_asm_insn ("cmp\t%0, %1", xops);
21628
21629   /* Branch.  */
21630   fputs ("\tbne\t", asm_out_file);
21631   assemble_name_raw (asm_out_file, loop_lab);
21632   fputc ('\n', asm_out_file);
21633
21634   return "";
21635 }
21636
21637 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21638    function.  */
21639 void
21640 arm_expand_prologue (void)
21641 {
21642   rtx amount;
21643   rtx insn;
21644   rtx ip_rtx;
21645   unsigned long live_regs_mask;
21646   unsigned long func_type;
21647   int fp_offset = 0;
21648   int saved_pretend_args = 0;
21649   int saved_regs = 0;
21650   unsigned HOST_WIDE_INT args_to_push;
21651   HOST_WIDE_INT size;
21652   arm_stack_offsets *offsets;
21653   bool clobber_ip;
21654
21655   func_type = arm_current_func_type ();
21656
21657   /* Naked functions don't have prologues.  */
21658   if (IS_NAKED (func_type))
21659     {
21660       if (flag_stack_usage_info)
21661         current_function_static_stack_size = 0;
21662       return;
21663     }
21664
21665   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21666   args_to_push = crtl->args.pretend_args_size;
21667
21668   /* Compute which register we will have to save onto the stack.  */
21669   offsets = arm_get_frame_offsets ();
21670   live_regs_mask = offsets->saved_regs_mask;
21671
21672   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21673
21674   if (IS_STACKALIGN (func_type))
21675     {
21676       rtx r0, r1;
21677
21678       /* Handle a word-aligned stack pointer.  We generate the following:
21679
21680           mov r0, sp
21681           bic r1, r0, #7
21682           mov sp, r1
21683           <save and restore r0 in normal prologue/epilogue>
21684           mov sp, r0
21685           bx lr
21686
21687          The unwinder doesn't need to know about the stack realignment.
21688          Just tell it we saved SP in r0.  */
21689       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21690
21691       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21692       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21693
21694       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21695       RTX_FRAME_RELATED_P (insn) = 1;
21696       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21697
21698       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21699
21700       /* ??? The CFA changes here, which may cause GDB to conclude that it
21701          has entered a different function.  That said, the unwind info is
21702          correct, individually, before and after this instruction because
21703          we've described the save of SP, which will override the default
21704          handling of SP as restoring from the CFA.  */
21705       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21706     }
21707
21708   /* The static chain register is the same as the IP register.  If it is
21709      clobbered when creating the frame, we need to save and restore it.  */
21710   clobber_ip = IS_NESTED (func_type)
21711                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21712                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21713                         || flag_stack_clash_protection)
21714                        && !df_regs_ever_live_p (LR_REGNUM)
21715                        && arm_r3_live_at_start_p ()));
21716
21717   /* Find somewhere to store IP whilst the frame is being created.
21718      We try the following places in order:
21719
21720        1. The last argument register r3 if it is available.
21721        2. A slot on the stack above the frame if there are no
21722           arguments to push onto the stack.
21723        3. Register r3 again, after pushing the argument registers
21724           onto the stack, if this is a varargs function.
21725        4. The last slot on the stack created for the arguments to
21726           push, if this isn't a varargs function.
21727
21728      Note - we only need to tell the dwarf2 backend about the SP
21729      adjustment in the second variant; the static chain register
21730      doesn't need to be unwound, as it doesn't contain a value
21731      inherited from the caller.  */
21732   if (clobber_ip)
21733     {
21734       if (!arm_r3_live_at_start_p ())
21735         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21736       else if (args_to_push == 0)
21737         {
21738           rtx addr, dwarf;
21739
21740           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21741           saved_regs += 4;
21742
21743           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21744           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21745           fp_offset = 4;
21746
21747           /* Just tell the dwarf backend that we adjusted SP.  */
21748           dwarf = gen_rtx_SET (stack_pointer_rtx,
21749                                plus_constant (Pmode, stack_pointer_rtx,
21750                                               -fp_offset));
21751           RTX_FRAME_RELATED_P (insn) = 1;
21752           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21753         }
21754       else
21755         {
21756           /* Store the args on the stack.  */
21757           if (cfun->machine->uses_anonymous_args)
21758             {
21759               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21760                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21761               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21762               saved_pretend_args = 1;
21763             }
21764           else
21765             {
21766               rtx addr, dwarf;
21767
21768               if (args_to_push == 4)
21769                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21770               else
21771                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21772                                            plus_constant (Pmode,
21773                                                           stack_pointer_rtx,
21774                                                           -args_to_push));
21775
21776               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21777
21778               /* Just tell the dwarf backend that we adjusted SP.  */
21779               dwarf = gen_rtx_SET (stack_pointer_rtx,
21780                                    plus_constant (Pmode, stack_pointer_rtx,
21781                                                   -args_to_push));
21782               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21783             }
21784
21785           RTX_FRAME_RELATED_P (insn) = 1;
21786           fp_offset = args_to_push;
21787           args_to_push = 0;
21788         }
21789     }
21790
21791   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21792     {
21793       if (IS_INTERRUPT (func_type))
21794         {
21795           /* Interrupt functions must not corrupt any registers.
21796              Creating a frame pointer however, corrupts the IP
21797              register, so we must push it first.  */
21798           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21799
21800           /* Do not set RTX_FRAME_RELATED_P on this insn.
21801              The dwarf stack unwinding code only wants to see one
21802              stack decrement per function, and this is not it.  If
21803              this instruction is labeled as being part of the frame
21804              creation sequence then dwarf2out_frame_debug_expr will
21805              die when it encounters the assignment of IP to FP
21806              later on, since the use of SP here establishes SP as
21807              the CFA register and not IP.
21808
21809              Anyway this instruction is not really part of the stack
21810              frame creation although it is part of the prologue.  */
21811         }
21812
21813       insn = emit_set_insn (ip_rtx,
21814                             plus_constant (Pmode, stack_pointer_rtx,
21815                                            fp_offset));
21816       RTX_FRAME_RELATED_P (insn) = 1;
21817     }
21818
21819   if (args_to_push)
21820     {
21821       /* Push the argument registers, or reserve space for them.  */
21822       if (cfun->machine->uses_anonymous_args)
21823         insn = emit_multi_reg_push
21824           ((0xf0 >> (args_to_push / 4)) & 0xf,
21825            (0xf0 >> (args_to_push / 4)) & 0xf);
21826       else
21827         insn = emit_insn
21828           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21829                        GEN_INT (- args_to_push)));
21830       RTX_FRAME_RELATED_P (insn) = 1;
21831     }
21832
21833   /* If this is an interrupt service routine, and the link register
21834      is going to be pushed, and we're not generating extra
21835      push of IP (needed when frame is needed and frame layout if apcs),
21836      subtracting four from LR now will mean that the function return
21837      can be done with a single instruction.  */
21838   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21839       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21840       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21841       && TARGET_ARM)
21842     {
21843       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21844
21845       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21846     }
21847
21848   if (live_regs_mask)
21849     {
21850       unsigned long dwarf_regs_mask = live_regs_mask;
21851
21852       saved_regs += bit_count (live_regs_mask) * 4;
21853       if (optimize_size && !frame_pointer_needed
21854           && saved_regs == offsets->saved_regs - offsets->saved_args)
21855         {
21856           /* If no coprocessor registers are being pushed and we don't have
21857              to worry about a frame pointer then push extra registers to
21858              create the stack frame.  This is done in a way that does not
21859              alter the frame layout, so is independent of the epilogue.  */
21860           int n;
21861           int frame;
21862           n = 0;
21863           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21864             n++;
21865           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21866           if (frame && n * 4 >= frame)
21867             {
21868               n = frame / 4;
21869               live_regs_mask |= (1 << n) - 1;
21870               saved_regs += frame;
21871             }
21872         }
21873
21874       if (TARGET_LDRD
21875           && current_tune->prefer_ldrd_strd
21876           && !optimize_function_for_size_p (cfun))
21877         {
21878           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21879           if (TARGET_THUMB2)
21880             thumb2_emit_strd_push (live_regs_mask);
21881           else if (TARGET_ARM
21882                    && !TARGET_APCS_FRAME
21883                    && !IS_INTERRUPT (func_type))
21884             arm_emit_strd_push (live_regs_mask);
21885           else
21886             {
21887               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21888               RTX_FRAME_RELATED_P (insn) = 1;
21889             }
21890         }
21891       else
21892         {
21893           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21894           RTX_FRAME_RELATED_P (insn) = 1;
21895         }
21896     }
21897
21898   if (! IS_VOLATILE (func_type))
21899     saved_regs += arm_save_coproc_regs ();
21900
21901   if (frame_pointer_needed && TARGET_ARM)
21902     {
21903       /* Create the new frame pointer.  */
21904       if (TARGET_APCS_FRAME)
21905         {
21906           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21907           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21908           RTX_FRAME_RELATED_P (insn) = 1;
21909         }
21910       else
21911         {
21912           insn = GEN_INT (saved_regs - (4 + fp_offset));
21913           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21914                                         stack_pointer_rtx, insn));
21915           RTX_FRAME_RELATED_P (insn) = 1;
21916         }
21917     }
21918
21919   size = offsets->outgoing_args - offsets->saved_args;
21920   if (flag_stack_usage_info)
21921     current_function_static_stack_size = size;
21922
21923   /* If this isn't an interrupt service routine and we have a frame, then do
21924      stack checking.  We use IP as the first scratch register, except for the
21925      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21926   if (!IS_INTERRUPT (func_type)
21927       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21928           || flag_stack_clash_protection))
21929     {
21930       unsigned int regno;
21931
21932       if (!IS_NESTED (func_type) || clobber_ip)
21933         regno = IP_REGNUM;
21934       else if (df_regs_ever_live_p (LR_REGNUM))
21935         regno = LR_REGNUM;
21936       else
21937         regno = 3;
21938
21939       if (crtl->is_leaf && !cfun->calls_alloca)
21940         {
21941           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21942             arm_emit_probe_stack_range (get_stack_check_protect (),
21943                                         size - get_stack_check_protect (),
21944                                         regno, live_regs_mask);
21945         }
21946       else if (size > 0)
21947         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21948                                     regno, live_regs_mask);
21949     }
21950
21951   /* Recover the static chain register.  */
21952   if (clobber_ip)
21953     {
21954       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21955         insn = gen_rtx_REG (SImode, 3);
21956       else
21957         {
21958           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21959           insn = gen_frame_mem (SImode, insn);
21960         }
21961       emit_set_insn (ip_rtx, insn);
21962       emit_insn (gen_force_register_use (ip_rtx));
21963     }
21964
21965   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21966     {
21967       /* This add can produce multiple insns for a large constant, so we
21968          need to get tricky.  */
21969       rtx_insn *last = get_last_insn ();
21970
21971       amount = GEN_INT (offsets->saved_args + saved_regs
21972                         - offsets->outgoing_args);
21973
21974       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21975                                     amount));
21976       do
21977         {
21978           last = last ? NEXT_INSN (last) : get_insns ();
21979           RTX_FRAME_RELATED_P (last) = 1;
21980         }
21981       while (last != insn);
21982
21983       /* If the frame pointer is needed, emit a special barrier that
21984          will prevent the scheduler from moving stores to the frame
21985          before the stack adjustment.  */
21986       if (frame_pointer_needed)
21987         emit_insn (gen_stack_tie (stack_pointer_rtx,
21988                                   hard_frame_pointer_rtx));
21989     }
21990
21991
21992   if (frame_pointer_needed && TARGET_THUMB2)
21993     thumb_set_frame_pointer (offsets);
21994
21995   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21996     {
21997       unsigned long mask;
21998
21999       mask = live_regs_mask;
22000       mask &= THUMB2_WORK_REGS;
22001       if (!IS_NESTED (func_type))
22002         mask |= (1 << IP_REGNUM);
22003       arm_load_pic_register (mask);
22004     }
22005
22006   /* If we are profiling, make sure no instructions are scheduled before
22007      the call to mcount.  Similarly if the user has requested no
22008      scheduling in the prolog.  Similarly if we want non-call exceptions
22009      using the EABI unwinder, to prevent faulting instructions from being
22010      swapped with a stack adjustment.  */
22011   if (crtl->profile || !TARGET_SCHED_PROLOG
22012       || (arm_except_unwind_info (&global_options) == UI_TARGET
22013           && cfun->can_throw_non_call_exceptions))
22014     emit_insn (gen_blockage ());
22015
22016   /* If the link register is being kept alive, with the return address in it,
22017      then make sure that it does not get reused by the ce2 pass.  */
22018   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22019     cfun->machine->lr_save_eliminated = 1;
22020 }
22021 \f
22022 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22023 static void
22024 arm_print_condition (FILE *stream)
22025 {
22026   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22027     {
22028       /* Branch conversion is not implemented for Thumb-2.  */
22029       if (TARGET_THUMB)
22030         {
22031           output_operand_lossage ("predicated Thumb instruction");
22032           return;
22033         }
22034       if (current_insn_predicate != NULL)
22035         {
22036           output_operand_lossage
22037             ("predicated instruction in conditional sequence");
22038           return;
22039         }
22040
22041       fputs (arm_condition_codes[arm_current_cc], stream);
22042     }
22043   else if (current_insn_predicate)
22044     {
22045       enum arm_cond_code code;
22046
22047       if (TARGET_THUMB1)
22048         {
22049           output_operand_lossage ("predicated Thumb instruction");
22050           return;
22051         }
22052
22053       code = get_arm_condition_code (current_insn_predicate);
22054       fputs (arm_condition_codes[code], stream);
22055     }
22056 }
22057
22058
22059 /* Globally reserved letters: acln
22060    Puncutation letters currently used: @_|?().!#
22061    Lower case letters currently used: bcdefhimpqtvwxyz
22062    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22063    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22064
22065    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22066
22067    If CODE is 'd', then the X is a condition operand and the instruction
22068    should only be executed if the condition is true.
22069    if CODE is 'D', then the X is a condition operand and the instruction
22070    should only be executed if the condition is false: however, if the mode
22071    of the comparison is CCFPEmode, then always execute the instruction -- we
22072    do this because in these circumstances !GE does not necessarily imply LT;
22073    in these cases the instruction pattern will take care to make sure that
22074    an instruction containing %d will follow, thereby undoing the effects of
22075    doing this instruction unconditionally.
22076    If CODE is 'N' then X is a floating point operand that must be negated
22077    before output.
22078    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22079    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22080 static void
22081 arm_print_operand (FILE *stream, rtx x, int code)
22082 {
22083   switch (code)
22084     {
22085     case '@':
22086       fputs (ASM_COMMENT_START, stream);
22087       return;
22088
22089     case '_':
22090       fputs (user_label_prefix, stream);
22091       return;
22092
22093     case '|':
22094       fputs (REGISTER_PREFIX, stream);
22095       return;
22096
22097     case '?':
22098       arm_print_condition (stream);
22099       return;
22100
22101     case '.':
22102       /* The current condition code for a condition code setting instruction.
22103          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22104       fputc('s', stream);
22105       arm_print_condition (stream);
22106       return;
22107
22108     case '!':
22109       /* If the instruction is conditionally executed then print
22110          the current condition code, otherwise print 's'.  */
22111       gcc_assert (TARGET_THUMB2);
22112       if (current_insn_predicate)
22113         arm_print_condition (stream);
22114       else
22115         fputc('s', stream);
22116       break;
22117
22118     /* %# is a "break" sequence. It doesn't output anything, but is used to
22119        separate e.g. operand numbers from following text, if that text consists
22120        of further digits which we don't want to be part of the operand
22121        number.  */
22122     case '#':
22123       return;
22124
22125     case 'N':
22126       {
22127         REAL_VALUE_TYPE r;
22128         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22129         fprintf (stream, "%s", fp_const_from_val (&r));
22130       }
22131       return;
22132
22133     /* An integer or symbol address without a preceding # sign.  */
22134     case 'c':
22135       switch (GET_CODE (x))
22136         {
22137         case CONST_INT:
22138           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22139           break;
22140
22141         case SYMBOL_REF:
22142           output_addr_const (stream, x);
22143           break;
22144
22145         case CONST:
22146           if (GET_CODE (XEXP (x, 0)) == PLUS
22147               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22148             {
22149               output_addr_const (stream, x);
22150               break;
22151             }
22152           /* Fall through.  */
22153
22154         default:
22155           output_operand_lossage ("Unsupported operand for code '%c'", code);
22156         }
22157       return;
22158
22159     /* An integer that we want to print in HEX.  */
22160     case 'x':
22161       switch (GET_CODE (x))
22162         {
22163         case CONST_INT:
22164           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22165           break;
22166
22167         default:
22168           output_operand_lossage ("Unsupported operand for code '%c'", code);
22169         }
22170       return;
22171
22172     case 'B':
22173       if (CONST_INT_P (x))
22174         {
22175           HOST_WIDE_INT val;
22176           val = ARM_SIGN_EXTEND (~INTVAL (x));
22177           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22178         }
22179       else
22180         {
22181           putc ('~', stream);
22182           output_addr_const (stream, x);
22183         }
22184       return;
22185
22186     case 'b':
22187       /* Print the log2 of a CONST_INT.  */
22188       {
22189         HOST_WIDE_INT val;
22190
22191         if (!CONST_INT_P (x)
22192             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22193           output_operand_lossage ("Unsupported operand for code '%c'", code);
22194         else
22195           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22196       }
22197       return;
22198
22199     case 'L':
22200       /* The low 16 bits of an immediate constant.  */
22201       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22202       return;
22203
22204     case 'i':
22205       fprintf (stream, "%s", arithmetic_instr (x, 1));
22206       return;
22207
22208     case 'I':
22209       fprintf (stream, "%s", arithmetic_instr (x, 0));
22210       return;
22211
22212     case 'S':
22213       {
22214         HOST_WIDE_INT val;
22215         const char *shift;
22216
22217         shift = shift_op (x, &val);
22218
22219         if (shift)
22220           {
22221             fprintf (stream, ", %s ", shift);
22222             if (val == -1)
22223               arm_print_operand (stream, XEXP (x, 1), 0);
22224             else
22225               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22226           }
22227       }
22228       return;
22229
22230       /* An explanation of the 'Q', 'R' and 'H' register operands:
22231
22232          In a pair of registers containing a DI or DF value the 'Q'
22233          operand returns the register number of the register containing
22234          the least significant part of the value.  The 'R' operand returns
22235          the register number of the register containing the most
22236          significant part of the value.
22237
22238          The 'H' operand returns the higher of the two register numbers.
22239          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22240          same as the 'Q' operand, since the most significant part of the
22241          value is held in the lower number register.  The reverse is true
22242          on systems where WORDS_BIG_ENDIAN is false.
22243
22244          The purpose of these operands is to distinguish between cases
22245          where the endian-ness of the values is important (for example
22246          when they are added together), and cases where the endian-ness
22247          is irrelevant, but the order of register operations is important.
22248          For example when loading a value from memory into a register
22249          pair, the endian-ness does not matter.  Provided that the value
22250          from the lower memory address is put into the lower numbered
22251          register, and the value from the higher address is put into the
22252          higher numbered register, the load will work regardless of whether
22253          the value being loaded is big-wordian or little-wordian.  The
22254          order of the two register loads can matter however, if the address
22255          of the memory location is actually held in one of the registers
22256          being overwritten by the load.
22257
22258          The 'Q' and 'R' constraints are also available for 64-bit
22259          constants.  */
22260     case 'Q':
22261       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22262         {
22263           rtx part = gen_lowpart (SImode, x);
22264           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22265           return;
22266         }
22267
22268       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22269         {
22270           output_operand_lossage ("invalid operand for code '%c'", code);
22271           return;
22272         }
22273
22274       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22275       return;
22276
22277     case 'R':
22278       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22279         {
22280           machine_mode mode = GET_MODE (x);
22281           rtx part;
22282
22283           if (mode == VOIDmode)
22284             mode = DImode;
22285           part = gen_highpart_mode (SImode, mode, x);
22286           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22287           return;
22288         }
22289
22290       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22291         {
22292           output_operand_lossage ("invalid operand for code '%c'", code);
22293           return;
22294         }
22295
22296       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22297       return;
22298
22299     case 'H':
22300       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22301         {
22302           output_operand_lossage ("invalid operand for code '%c'", code);
22303           return;
22304         }
22305
22306       asm_fprintf (stream, "%r", REGNO (x) + 1);
22307       return;
22308
22309     case 'J':
22310       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22311         {
22312           output_operand_lossage ("invalid operand for code '%c'", code);
22313           return;
22314         }
22315
22316       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22317       return;
22318
22319     case 'K':
22320       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22321         {
22322           output_operand_lossage ("invalid operand for code '%c'", code);
22323           return;
22324         }
22325
22326       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22327       return;
22328
22329     case 'm':
22330       asm_fprintf (stream, "%r",
22331                    REG_P (XEXP (x, 0))
22332                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22333       return;
22334
22335     case 'M':
22336       asm_fprintf (stream, "{%r-%r}",
22337                    REGNO (x),
22338                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22339       return;
22340
22341     /* Like 'M', but writing doubleword vector registers, for use by Neon
22342        insns.  */
22343     case 'h':
22344       {
22345         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22346         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22347         if (numregs == 1)
22348           asm_fprintf (stream, "{d%d}", regno);
22349         else
22350           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22351       }
22352       return;
22353
22354     case 'd':
22355       /* CONST_TRUE_RTX means always -- that's the default.  */
22356       if (x == const_true_rtx)
22357         return;
22358
22359       if (!COMPARISON_P (x))
22360         {
22361           output_operand_lossage ("invalid operand for code '%c'", code);
22362           return;
22363         }
22364
22365       fputs (arm_condition_codes[get_arm_condition_code (x)],
22366              stream);
22367       return;
22368
22369     case 'D':
22370       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22371          want to do that.  */
22372       if (x == const_true_rtx)
22373         {
22374           output_operand_lossage ("instruction never executed");
22375           return;
22376         }
22377       if (!COMPARISON_P (x))
22378         {
22379           output_operand_lossage ("invalid operand for code '%c'", code);
22380           return;
22381         }
22382
22383       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22384                                  (get_arm_condition_code (x))],
22385              stream);
22386       return;
22387
22388     case 's':
22389     case 'V':
22390     case 'W':
22391     case 'X':
22392     case 'Y':
22393     case 'Z':
22394       /* Former Maverick support, removed after GCC-4.7.  */
22395       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22396       return;
22397
22398     case 'U':
22399       if (!REG_P (x)
22400           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22401           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22402         /* Bad value for wCG register number.  */
22403         {
22404           output_operand_lossage ("invalid operand for code '%c'", code);
22405           return;
22406         }
22407
22408       else
22409         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22410       return;
22411
22412       /* Print an iWMMXt control register name.  */
22413     case 'w':
22414       if (!CONST_INT_P (x)
22415           || INTVAL (x) < 0
22416           || INTVAL (x) >= 16)
22417         /* Bad value for wC register number.  */
22418         {
22419           output_operand_lossage ("invalid operand for code '%c'", code);
22420           return;
22421         }
22422
22423       else
22424         {
22425           static const char * wc_reg_names [16] =
22426             {
22427               "wCID",  "wCon",  "wCSSF", "wCASF",
22428               "wC4",   "wC5",   "wC6",   "wC7",
22429               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22430               "wC12",  "wC13",  "wC14",  "wC15"
22431             };
22432
22433           fputs (wc_reg_names [INTVAL (x)], stream);
22434         }
22435       return;
22436
22437     /* Print the high single-precision register of a VFP double-precision
22438        register.  */
22439     case 'p':
22440       {
22441         machine_mode mode = GET_MODE (x);
22442         int regno;
22443
22444         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22445           {
22446             output_operand_lossage ("invalid operand for code '%c'", code);
22447             return;
22448           }
22449
22450         regno = REGNO (x);
22451         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22452           {
22453             output_operand_lossage ("invalid operand for code '%c'", code);
22454             return;
22455           }
22456
22457         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22458       }
22459       return;
22460
22461     /* Print a VFP/Neon double precision or quad precision register name.  */
22462     case 'P':
22463     case 'q':
22464       {
22465         machine_mode mode = GET_MODE (x);
22466         int is_quad = (code == 'q');
22467         int regno;
22468
22469         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22470           {
22471             output_operand_lossage ("invalid operand for code '%c'", code);
22472             return;
22473           }
22474
22475         if (!REG_P (x)
22476             || !IS_VFP_REGNUM (REGNO (x)))
22477           {
22478             output_operand_lossage ("invalid operand for code '%c'", code);
22479             return;
22480           }
22481
22482         regno = REGNO (x);
22483         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22484             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22485           {
22486             output_operand_lossage ("invalid operand for code '%c'", code);
22487             return;
22488           }
22489
22490         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22491           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22492       }
22493       return;
22494
22495     /* These two codes print the low/high doubleword register of a Neon quad
22496        register, respectively.  For pair-structure types, can also print
22497        low/high quadword registers.  */
22498     case 'e':
22499     case 'f':
22500       {
22501         machine_mode mode = GET_MODE (x);
22502         int regno;
22503
22504         if ((GET_MODE_SIZE (mode) != 16
22505              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22506           {
22507             output_operand_lossage ("invalid operand for code '%c'", code);
22508             return;
22509           }
22510
22511         regno = REGNO (x);
22512         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22513           {
22514             output_operand_lossage ("invalid operand for code '%c'", code);
22515             return;
22516           }
22517
22518         if (GET_MODE_SIZE (mode) == 16)
22519           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22520                                   + (code == 'f' ? 1 : 0));
22521         else
22522           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22523                                   + (code == 'f' ? 1 : 0));
22524       }
22525       return;
22526
22527     /* Print a VFPv3 floating-point constant, represented as an integer
22528        index.  */
22529     case 'G':
22530       {
22531         int index = vfp3_const_double_index (x);
22532         gcc_assert (index != -1);
22533         fprintf (stream, "%d", index);
22534       }
22535       return;
22536
22537     /* Print bits representing opcode features for Neon.
22538
22539        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22540        and polynomials as unsigned.
22541
22542        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22543
22544        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22545
22546     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22547     case 'T':
22548       {
22549         HOST_WIDE_INT bits = INTVAL (x);
22550         fputc ("uspf"[bits & 3], stream);
22551       }
22552       return;
22553
22554     /* Likewise, but signed and unsigned integers are both 'i'.  */
22555     case 'F':
22556       {
22557         HOST_WIDE_INT bits = INTVAL (x);
22558         fputc ("iipf"[bits & 3], stream);
22559       }
22560       return;
22561
22562     /* As for 'T', but emit 'u' instead of 'p'.  */
22563     case 't':
22564       {
22565         HOST_WIDE_INT bits = INTVAL (x);
22566         fputc ("usuf"[bits & 3], stream);
22567       }
22568       return;
22569
22570     /* Bit 2: rounding (vs none).  */
22571     case 'O':
22572       {
22573         HOST_WIDE_INT bits = INTVAL (x);
22574         fputs ((bits & 4) != 0 ? "r" : "", stream);
22575       }
22576       return;
22577
22578     /* Memory operand for vld1/vst1 instruction.  */
22579     case 'A':
22580       {
22581         rtx addr;
22582         bool postinc = FALSE;
22583         rtx postinc_reg = NULL;
22584         unsigned align, memsize, align_bits;
22585
22586         gcc_assert (MEM_P (x));
22587         addr = XEXP (x, 0);
22588         if (GET_CODE (addr) == POST_INC)
22589           {
22590             postinc = 1;
22591             addr = XEXP (addr, 0);
22592           }
22593         if (GET_CODE (addr) == POST_MODIFY)
22594           {
22595             postinc_reg = XEXP( XEXP (addr, 1), 1);
22596             addr = XEXP (addr, 0);
22597           }
22598         asm_fprintf (stream, "[%r", REGNO (addr));
22599
22600         /* We know the alignment of this access, so we can emit a hint in the
22601            instruction (for some alignments) as an aid to the memory subsystem
22602            of the target.  */
22603         align = MEM_ALIGN (x) >> 3;
22604         memsize = MEM_SIZE (x);
22605
22606         /* Only certain alignment specifiers are supported by the hardware.  */
22607         if (memsize == 32 && (align % 32) == 0)
22608           align_bits = 256;
22609         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22610           align_bits = 128;
22611         else if (memsize >= 8 && (align % 8) == 0)
22612           align_bits = 64;
22613         else
22614           align_bits = 0;
22615
22616         if (align_bits != 0)
22617           asm_fprintf (stream, ":%d", align_bits);
22618
22619         asm_fprintf (stream, "]");
22620
22621         if (postinc)
22622           fputs("!", stream);
22623         if (postinc_reg)
22624           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22625       }
22626       return;
22627
22628     case 'C':
22629       {
22630         rtx addr;
22631
22632         gcc_assert (MEM_P (x));
22633         addr = XEXP (x, 0);
22634         gcc_assert (REG_P (addr));
22635         asm_fprintf (stream, "[%r]", REGNO (addr));
22636       }
22637       return;
22638
22639     /* Translate an S register number into a D register number and element index.  */
22640     case 'y':
22641       {
22642         machine_mode mode = GET_MODE (x);
22643         int regno;
22644
22645         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22646           {
22647             output_operand_lossage ("invalid operand for code '%c'", code);
22648             return;
22649           }
22650
22651         regno = REGNO (x);
22652         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22653           {
22654             output_operand_lossage ("invalid operand for code '%c'", code);
22655             return;
22656           }
22657
22658         regno = regno - FIRST_VFP_REGNUM;
22659         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22660       }
22661       return;
22662
22663     case 'v':
22664         gcc_assert (CONST_DOUBLE_P (x));
22665         int result;
22666         result = vfp3_const_double_for_fract_bits (x);
22667         if (result == 0)
22668           result = vfp3_const_double_for_bits (x);
22669         fprintf (stream, "#%d", result);
22670         return;
22671
22672     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22673        number into a D register number and element index.  */
22674     case 'z':
22675       {
22676         machine_mode mode = GET_MODE (x);
22677         int regno;
22678
22679         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22680           {
22681             output_operand_lossage ("invalid operand for code '%c'", code);
22682             return;
22683           }
22684
22685         regno = REGNO (x);
22686         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22687           {
22688             output_operand_lossage ("invalid operand for code '%c'", code);
22689             return;
22690           }
22691
22692         regno = regno - FIRST_VFP_REGNUM;
22693         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22694       }
22695       return;
22696
22697     default:
22698       if (x == 0)
22699         {
22700           output_operand_lossage ("missing operand");
22701           return;
22702         }
22703
22704       switch (GET_CODE (x))
22705         {
22706         case REG:
22707           asm_fprintf (stream, "%r", REGNO (x));
22708           break;
22709
22710         case MEM:
22711           output_address (GET_MODE (x), XEXP (x, 0));
22712           break;
22713
22714         case CONST_DOUBLE:
22715           {
22716             char fpstr[20];
22717             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22718                               sizeof (fpstr), 0, 1);
22719             fprintf (stream, "#%s", fpstr);
22720           }
22721           break;
22722
22723         default:
22724           gcc_assert (GET_CODE (x) != NEG);
22725           fputc ('#', stream);
22726           if (GET_CODE (x) == HIGH)
22727             {
22728               fputs (":lower16:", stream);
22729               x = XEXP (x, 0);
22730             }
22731
22732           output_addr_const (stream, x);
22733           break;
22734         }
22735     }
22736 }
22737 \f
22738 /* Target hook for printing a memory address.  */
22739 static void
22740 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22741 {
22742   if (TARGET_32BIT)
22743     {
22744       int is_minus = GET_CODE (x) == MINUS;
22745
22746       if (REG_P (x))
22747         asm_fprintf (stream, "[%r]", REGNO (x));
22748       else if (GET_CODE (x) == PLUS || is_minus)
22749         {
22750           rtx base = XEXP (x, 0);
22751           rtx index = XEXP (x, 1);
22752           HOST_WIDE_INT offset = 0;
22753           if (!REG_P (base)
22754               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22755             {
22756               /* Ensure that BASE is a register.  */
22757               /* (one of them must be).  */
22758               /* Also ensure the SP is not used as in index register.  */
22759               std::swap (base, index);
22760             }
22761           switch (GET_CODE (index))
22762             {
22763             case CONST_INT:
22764               offset = INTVAL (index);
22765               if (is_minus)
22766                 offset = -offset;
22767               asm_fprintf (stream, "[%r, #%wd]",
22768                            REGNO (base), offset);
22769               break;
22770
22771             case REG:
22772               asm_fprintf (stream, "[%r, %s%r]",
22773                            REGNO (base), is_minus ? "-" : "",
22774                            REGNO (index));
22775               break;
22776
22777             case MULT:
22778             case ASHIFTRT:
22779             case LSHIFTRT:
22780             case ASHIFT:
22781             case ROTATERT:
22782               {
22783                 asm_fprintf (stream, "[%r, %s%r",
22784                              REGNO (base), is_minus ? "-" : "",
22785                              REGNO (XEXP (index, 0)));
22786                 arm_print_operand (stream, index, 'S');
22787                 fputs ("]", stream);
22788                 break;
22789               }
22790
22791             default:
22792               gcc_unreachable ();
22793             }
22794         }
22795       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22796                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22797         {
22798           gcc_assert (REG_P (XEXP (x, 0)));
22799
22800           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22801             asm_fprintf (stream, "[%r, #%s%d]!",
22802                          REGNO (XEXP (x, 0)),
22803                          GET_CODE (x) == PRE_DEC ? "-" : "",
22804                          GET_MODE_SIZE (mode));
22805           else
22806             asm_fprintf (stream, "[%r], #%s%d",
22807                          REGNO (XEXP (x, 0)),
22808                          GET_CODE (x) == POST_DEC ? "-" : "",
22809                          GET_MODE_SIZE (mode));
22810         }
22811       else if (GET_CODE (x) == PRE_MODIFY)
22812         {
22813           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22814           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22815             asm_fprintf (stream, "#%wd]!",
22816                          INTVAL (XEXP (XEXP (x, 1), 1)));
22817           else
22818             asm_fprintf (stream, "%r]!",
22819                          REGNO (XEXP (XEXP (x, 1), 1)));
22820         }
22821       else if (GET_CODE (x) == POST_MODIFY)
22822         {
22823           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22824           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22825             asm_fprintf (stream, "#%wd",
22826                          INTVAL (XEXP (XEXP (x, 1), 1)));
22827           else
22828             asm_fprintf (stream, "%r",
22829                          REGNO (XEXP (XEXP (x, 1), 1)));
22830         }
22831       else output_addr_const (stream, x);
22832     }
22833   else
22834     {
22835       if (REG_P (x))
22836         asm_fprintf (stream, "[%r]", REGNO (x));
22837       else if (GET_CODE (x) == POST_INC)
22838         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22839       else if (GET_CODE (x) == PLUS)
22840         {
22841           gcc_assert (REG_P (XEXP (x, 0)));
22842           if (CONST_INT_P (XEXP (x, 1)))
22843             asm_fprintf (stream, "[%r, #%wd]",
22844                          REGNO (XEXP (x, 0)),
22845                          INTVAL (XEXP (x, 1)));
22846           else
22847             asm_fprintf (stream, "[%r, %r]",
22848                          REGNO (XEXP (x, 0)),
22849                          REGNO (XEXP (x, 1)));
22850         }
22851       else
22852         output_addr_const (stream, x);
22853     }
22854 }
22855 \f
22856 /* Target hook for indicating whether a punctuation character for
22857    TARGET_PRINT_OPERAND is valid.  */
22858 static bool
22859 arm_print_operand_punct_valid_p (unsigned char code)
22860 {
22861   return (code == '@' || code == '|' || code == '.'
22862           || code == '(' || code == ')' || code == '#'
22863           || (TARGET_32BIT && (code == '?'))
22864           || (TARGET_THUMB2 && (code == '!'))
22865           || (TARGET_THUMB && (code == '_')));
22866 }
22867 \f
22868 /* Target hook for assembling integer objects.  The ARM version needs to
22869    handle word-sized values specially.  */
22870 static bool
22871 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22872 {
22873   machine_mode mode;
22874
22875   if (size == UNITS_PER_WORD && aligned_p)
22876     {
22877       fputs ("\t.word\t", asm_out_file);
22878       output_addr_const (asm_out_file, x);
22879
22880       /* Mark symbols as position independent.  We only do this in the
22881          .text segment, not in the .data segment.  */
22882       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22883           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22884         {
22885           /* See legitimize_pic_address for an explanation of the
22886              TARGET_VXWORKS_RTP check.  */
22887           /* References to weak symbols cannot be resolved locally:
22888              they may be overridden by a non-weak definition at link
22889              time.  */
22890           if (!arm_pic_data_is_text_relative
22891               || (GET_CODE (x) == SYMBOL_REF
22892                   && (!SYMBOL_REF_LOCAL_P (x)
22893                       || (SYMBOL_REF_DECL (x)
22894                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22895             fputs ("(GOT)", asm_out_file);
22896           else
22897             fputs ("(GOTOFF)", asm_out_file);
22898         }
22899       fputc ('\n', asm_out_file);
22900       return true;
22901     }
22902
22903   mode = GET_MODE (x);
22904
22905   if (arm_vector_mode_supported_p (mode))
22906     {
22907       int i, units;
22908
22909       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22910
22911       units = CONST_VECTOR_NUNITS (x);
22912       size = GET_MODE_UNIT_SIZE (mode);
22913
22914       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22915         for (i = 0; i < units; i++)
22916           {
22917             rtx elt = CONST_VECTOR_ELT (x, i);
22918             assemble_integer
22919               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22920           }
22921       else
22922         for (i = 0; i < units; i++)
22923           {
22924             rtx elt = CONST_VECTOR_ELT (x, i);
22925             assemble_real
22926               (*CONST_DOUBLE_REAL_VALUE (elt),
22927                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22928                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22929           }
22930
22931       return true;
22932     }
22933
22934   return default_assemble_integer (x, size, aligned_p);
22935 }
22936
22937 static void
22938 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22939 {
22940   section *s;
22941
22942   if (!TARGET_AAPCS_BASED)
22943     {
22944       (is_ctor ?
22945        default_named_section_asm_out_constructor
22946        : default_named_section_asm_out_destructor) (symbol, priority);
22947       return;
22948     }
22949
22950   /* Put these in the .init_array section, using a special relocation.  */
22951   if (priority != DEFAULT_INIT_PRIORITY)
22952     {
22953       char buf[18];
22954       sprintf (buf, "%s.%.5u",
22955                is_ctor ? ".init_array" : ".fini_array",
22956                priority);
22957       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22958     }
22959   else if (is_ctor)
22960     s = ctors_section;
22961   else
22962     s = dtors_section;
22963
22964   switch_to_section (s);
22965   assemble_align (POINTER_SIZE);
22966   fputs ("\t.word\t", asm_out_file);
22967   output_addr_const (asm_out_file, symbol);
22968   fputs ("(target1)\n", asm_out_file);
22969 }
22970
22971 /* Add a function to the list of static constructors.  */
22972
22973 static void
22974 arm_elf_asm_constructor (rtx symbol, int priority)
22975 {
22976   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22977 }
22978
22979 /* Add a function to the list of static destructors.  */
22980
22981 static void
22982 arm_elf_asm_destructor (rtx symbol, int priority)
22983 {
22984   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22985 }
22986 \f
22987 /* A finite state machine takes care of noticing whether or not instructions
22988    can be conditionally executed, and thus decrease execution time and code
22989    size by deleting branch instructions.  The fsm is controlled by
22990    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22991
22992 /* The state of the fsm controlling condition codes are:
22993    0: normal, do nothing special
22994    1: make ASM_OUTPUT_OPCODE not output this instruction
22995    2: make ASM_OUTPUT_OPCODE not output this instruction
22996    3: make instructions conditional
22997    4: make instructions conditional
22998
22999    State transitions (state->state by whom under condition):
23000    0 -> 1 final_prescan_insn if the `target' is a label
23001    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23002    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23003    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23004    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23005           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23006    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23007           (the target insn is arm_target_insn).
23008
23009    If the jump clobbers the conditions then we use states 2 and 4.
23010
23011    A similar thing can be done with conditional return insns.
23012
23013    XXX In case the `target' is an unconditional branch, this conditionalising
23014    of the instructions always reduces code size, but not always execution
23015    time.  But then, I want to reduce the code size to somewhere near what
23016    /bin/cc produces.  */
23017
23018 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23019    instructions.  When a COND_EXEC instruction is seen the subsequent
23020    instructions are scanned so that multiple conditional instructions can be
23021    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23022    specify the length and true/false mask for the IT block.  These will be
23023    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23024
23025 /* Returns the index of the ARM condition code string in
23026    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23027    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23028
23029 enum arm_cond_code
23030 maybe_get_arm_condition_code (rtx comparison)
23031 {
23032   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23033   enum arm_cond_code code;
23034   enum rtx_code comp_code = GET_CODE (comparison);
23035
23036   if (GET_MODE_CLASS (mode) != MODE_CC)
23037     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23038                            XEXP (comparison, 1));
23039
23040   switch (mode)
23041     {
23042     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23043     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23044     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23045     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23046     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23047     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23048     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23049     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23050     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23051     case E_CC_DLTUmode: code = ARM_CC;
23052
23053     dominance:
23054       if (comp_code == EQ)
23055         return ARM_INVERSE_CONDITION_CODE (code);
23056       if (comp_code == NE)
23057         return code;
23058       return ARM_NV;
23059
23060     case E_CC_NOOVmode:
23061       switch (comp_code)
23062         {
23063         case NE: return ARM_NE;
23064         case EQ: return ARM_EQ;
23065         case GE: return ARM_PL;
23066         case LT: return ARM_MI;
23067         default: return ARM_NV;
23068         }
23069
23070     case E_CC_Zmode:
23071       switch (comp_code)
23072         {
23073         case NE: return ARM_NE;
23074         case EQ: return ARM_EQ;
23075         default: return ARM_NV;
23076         }
23077
23078     case E_CC_Nmode:
23079       switch (comp_code)
23080         {
23081         case NE: return ARM_MI;
23082         case EQ: return ARM_PL;
23083         default: return ARM_NV;
23084         }
23085
23086     case E_CCFPEmode:
23087     case E_CCFPmode:
23088       /* We can handle all cases except UNEQ and LTGT.  */
23089       switch (comp_code)
23090         {
23091         case GE: return ARM_GE;
23092         case GT: return ARM_GT;
23093         case LE: return ARM_LS;
23094         case LT: return ARM_MI;
23095         case NE: return ARM_NE;
23096         case EQ: return ARM_EQ;
23097         case ORDERED: return ARM_VC;
23098         case UNORDERED: return ARM_VS;
23099         case UNLT: return ARM_LT;
23100         case UNLE: return ARM_LE;
23101         case UNGT: return ARM_HI;
23102         case UNGE: return ARM_PL;
23103           /* UNEQ and LTGT do not have a representation.  */
23104         case UNEQ: /* Fall through.  */
23105         case LTGT: /* Fall through.  */
23106         default: return ARM_NV;
23107         }
23108
23109     case E_CC_SWPmode:
23110       switch (comp_code)
23111         {
23112         case NE: return ARM_NE;
23113         case EQ: return ARM_EQ;
23114         case GE: return ARM_LE;
23115         case GT: return ARM_LT;
23116         case LE: return ARM_GE;
23117         case LT: return ARM_GT;
23118         case GEU: return ARM_LS;
23119         case GTU: return ARM_CC;
23120         case LEU: return ARM_CS;
23121         case LTU: return ARM_HI;
23122         default: return ARM_NV;
23123         }
23124
23125     case E_CC_Cmode:
23126       switch (comp_code)
23127         {
23128         case LTU: return ARM_CS;
23129         case GEU: return ARM_CC;
23130         case NE: return ARM_CS;
23131         case EQ: return ARM_CC;
23132         default: return ARM_NV;
23133         }
23134
23135     case E_CC_CZmode:
23136       switch (comp_code)
23137         {
23138         case NE: return ARM_NE;
23139         case EQ: return ARM_EQ;
23140         case GEU: return ARM_CS;
23141         case GTU: return ARM_HI;
23142         case LEU: return ARM_LS;
23143         case LTU: return ARM_CC;
23144         default: return ARM_NV;
23145         }
23146
23147     case E_CC_NCVmode:
23148       switch (comp_code)
23149         {
23150         case GE: return ARM_GE;
23151         case LT: return ARM_LT;
23152         case GEU: return ARM_CS;
23153         case LTU: return ARM_CC;
23154         default: return ARM_NV;
23155         }
23156
23157     case E_CC_Vmode:
23158       switch (comp_code)
23159         {
23160         case NE: return ARM_VS;
23161         case EQ: return ARM_VC;
23162         default: return ARM_NV;
23163         }
23164
23165     case E_CCmode:
23166       switch (comp_code)
23167         {
23168         case NE: return ARM_NE;
23169         case EQ: return ARM_EQ;
23170         case GE: return ARM_GE;
23171         case GT: return ARM_GT;
23172         case LE: return ARM_LE;
23173         case LT: return ARM_LT;
23174         case GEU: return ARM_CS;
23175         case GTU: return ARM_HI;
23176         case LEU: return ARM_LS;
23177         case LTU: return ARM_CC;
23178         default: return ARM_NV;
23179         }
23180
23181     default: gcc_unreachable ();
23182     }
23183 }
23184
23185 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23186 static enum arm_cond_code
23187 get_arm_condition_code (rtx comparison)
23188 {
23189   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23190   gcc_assert (code != ARM_NV);
23191   return code;
23192 }
23193
23194 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23195    code registers when not targetting Thumb1.  The VFP condition register
23196    only exists when generating hard-float code.  */
23197 static bool
23198 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23199 {
23200   if (!TARGET_32BIT)
23201     return false;
23202
23203   *p1 = CC_REGNUM;
23204   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23205   return true;
23206 }
23207
23208 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23209    instructions.  */
23210 void
23211 thumb2_final_prescan_insn (rtx_insn *insn)
23212 {
23213   rtx_insn *first_insn = insn;
23214   rtx body = PATTERN (insn);
23215   rtx predicate;
23216   enum arm_cond_code code;
23217   int n;
23218   int mask;
23219   int max;
23220
23221   /* max_insns_skipped in the tune was already taken into account in the
23222      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23223      just emit the IT blocks as we can.  It does not make sense to split
23224      the IT blocks.  */
23225   max = MAX_INSN_PER_IT_BLOCK;
23226
23227   /* Remove the previous insn from the count of insns to be output.  */
23228   if (arm_condexec_count)
23229       arm_condexec_count--;
23230
23231   /* Nothing to do if we are already inside a conditional block.  */
23232   if (arm_condexec_count)
23233     return;
23234
23235   if (GET_CODE (body) != COND_EXEC)
23236     return;
23237
23238   /* Conditional jumps are implemented directly.  */
23239   if (JUMP_P (insn))
23240     return;
23241
23242   predicate = COND_EXEC_TEST (body);
23243   arm_current_cc = get_arm_condition_code (predicate);
23244
23245   n = get_attr_ce_count (insn);
23246   arm_condexec_count = 1;
23247   arm_condexec_mask = (1 << n) - 1;
23248   arm_condexec_masklen = n;
23249   /* See if subsequent instructions can be combined into the same block.  */
23250   for (;;)
23251     {
23252       insn = next_nonnote_insn (insn);
23253
23254       /* Jumping into the middle of an IT block is illegal, so a label or
23255          barrier terminates the block.  */
23256       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23257         break;
23258
23259       body = PATTERN (insn);
23260       /* USE and CLOBBER aren't really insns, so just skip them.  */
23261       if (GET_CODE (body) == USE
23262           || GET_CODE (body) == CLOBBER)
23263         continue;
23264
23265       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23266       if (GET_CODE (body) != COND_EXEC)
23267         break;
23268       /* Maximum number of conditionally executed instructions in a block.  */
23269       n = get_attr_ce_count (insn);
23270       if (arm_condexec_masklen + n > max)
23271         break;
23272
23273       predicate = COND_EXEC_TEST (body);
23274       code = get_arm_condition_code (predicate);
23275       mask = (1 << n) - 1;
23276       if (arm_current_cc == code)
23277         arm_condexec_mask |= (mask << arm_condexec_masklen);
23278       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23279         break;
23280
23281       arm_condexec_count++;
23282       arm_condexec_masklen += n;
23283
23284       /* A jump must be the last instruction in a conditional block.  */
23285       if (JUMP_P (insn))
23286         break;
23287     }
23288   /* Restore recog_data (getting the attributes of other insns can
23289      destroy this array, but final.c assumes that it remains intact
23290      across this call).  */
23291   extract_constrain_insn_cached (first_insn);
23292 }
23293
23294 void
23295 arm_final_prescan_insn (rtx_insn *insn)
23296 {
23297   /* BODY will hold the body of INSN.  */
23298   rtx body = PATTERN (insn);
23299
23300   /* This will be 1 if trying to repeat the trick, and things need to be
23301      reversed if it appears to fail.  */
23302   int reverse = 0;
23303
23304   /* If we start with a return insn, we only succeed if we find another one.  */
23305   int seeking_return = 0;
23306   enum rtx_code return_code = UNKNOWN;
23307
23308   /* START_INSN will hold the insn from where we start looking.  This is the
23309      first insn after the following code_label if REVERSE is true.  */
23310   rtx_insn *start_insn = insn;
23311
23312   /* If in state 4, check if the target branch is reached, in order to
23313      change back to state 0.  */
23314   if (arm_ccfsm_state == 4)
23315     {
23316       if (insn == arm_target_insn)
23317         {
23318           arm_target_insn = NULL;
23319           arm_ccfsm_state = 0;
23320         }
23321       return;
23322     }
23323
23324   /* If in state 3, it is possible to repeat the trick, if this insn is an
23325      unconditional branch to a label, and immediately following this branch
23326      is the previous target label which is only used once, and the label this
23327      branch jumps to is not too far off.  */
23328   if (arm_ccfsm_state == 3)
23329     {
23330       if (simplejump_p (insn))
23331         {
23332           start_insn = next_nonnote_insn (start_insn);
23333           if (BARRIER_P (start_insn))
23334             {
23335               /* XXX Isn't this always a barrier?  */
23336               start_insn = next_nonnote_insn (start_insn);
23337             }
23338           if (LABEL_P (start_insn)
23339               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23340               && LABEL_NUSES (start_insn) == 1)
23341             reverse = TRUE;
23342           else
23343             return;
23344         }
23345       else if (ANY_RETURN_P (body))
23346         {
23347           start_insn = next_nonnote_insn (start_insn);
23348           if (BARRIER_P (start_insn))
23349             start_insn = next_nonnote_insn (start_insn);
23350           if (LABEL_P (start_insn)
23351               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23352               && LABEL_NUSES (start_insn) == 1)
23353             {
23354               reverse = TRUE;
23355               seeking_return = 1;
23356               return_code = GET_CODE (body);
23357             }
23358           else
23359             return;
23360         }
23361       else
23362         return;
23363     }
23364
23365   gcc_assert (!arm_ccfsm_state || reverse);
23366   if (!JUMP_P (insn))
23367     return;
23368
23369   /* This jump might be paralleled with a clobber of the condition codes
23370      the jump should always come first */
23371   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23372     body = XVECEXP (body, 0, 0);
23373
23374   if (reverse
23375       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23376           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23377     {
23378       int insns_skipped;
23379       int fail = FALSE, succeed = FALSE;
23380       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23381       int then_not_else = TRUE;
23382       rtx_insn *this_insn = start_insn;
23383       rtx label = 0;
23384
23385       /* Register the insn jumped to.  */
23386       if (reverse)
23387         {
23388           if (!seeking_return)
23389             label = XEXP (SET_SRC (body), 0);
23390         }
23391       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23392         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23393       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23394         {
23395           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23396           then_not_else = FALSE;
23397         }
23398       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23399         {
23400           seeking_return = 1;
23401           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23402         }
23403       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23404         {
23405           seeking_return = 1;
23406           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23407           then_not_else = FALSE;
23408         }
23409       else
23410         gcc_unreachable ();
23411
23412       /* See how many insns this branch skips, and what kind of insns.  If all
23413          insns are okay, and the label or unconditional branch to the same
23414          label is not too far away, succeed.  */
23415       for (insns_skipped = 0;
23416            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23417         {
23418           rtx scanbody;
23419
23420           this_insn = next_nonnote_insn (this_insn);
23421           if (!this_insn)
23422             break;
23423
23424           switch (GET_CODE (this_insn))
23425             {
23426             case CODE_LABEL:
23427               /* Succeed if it is the target label, otherwise fail since
23428                  control falls in from somewhere else.  */
23429               if (this_insn == label)
23430                 {
23431                   arm_ccfsm_state = 1;
23432                   succeed = TRUE;
23433                 }
23434               else
23435                 fail = TRUE;
23436               break;
23437
23438             case BARRIER:
23439               /* Succeed if the following insn is the target label.
23440                  Otherwise fail.
23441                  If return insns are used then the last insn in a function
23442                  will be a barrier.  */
23443               this_insn = next_nonnote_insn (this_insn);
23444               if (this_insn && this_insn == label)
23445                 {
23446                   arm_ccfsm_state = 1;
23447                   succeed = TRUE;
23448                 }
23449               else
23450                 fail = TRUE;
23451               break;
23452
23453             case CALL_INSN:
23454               /* The AAPCS says that conditional calls should not be
23455                  used since they make interworking inefficient (the
23456                  linker can't transform BL<cond> into BLX).  That's
23457                  only a problem if the machine has BLX.  */
23458               if (arm_arch5)
23459                 {
23460                   fail = TRUE;
23461                   break;
23462                 }
23463
23464               /* Succeed if the following insn is the target label, or
23465                  if the following two insns are a barrier and the
23466                  target label.  */
23467               this_insn = next_nonnote_insn (this_insn);
23468               if (this_insn && BARRIER_P (this_insn))
23469                 this_insn = next_nonnote_insn (this_insn);
23470
23471               if (this_insn && this_insn == label
23472                   && insns_skipped < max_insns_skipped)
23473                 {
23474                   arm_ccfsm_state = 1;
23475                   succeed = TRUE;
23476                 }
23477               else
23478                 fail = TRUE;
23479               break;
23480
23481             case JUMP_INSN:
23482               /* If this is an unconditional branch to the same label, succeed.
23483                  If it is to another label, do nothing.  If it is conditional,
23484                  fail.  */
23485               /* XXX Probably, the tests for SET and the PC are
23486                  unnecessary.  */
23487
23488               scanbody = PATTERN (this_insn);
23489               if (GET_CODE (scanbody) == SET
23490                   && GET_CODE (SET_DEST (scanbody)) == PC)
23491                 {
23492                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23493                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23494                     {
23495                       arm_ccfsm_state = 2;
23496                       succeed = TRUE;
23497                     }
23498                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23499                     fail = TRUE;
23500                 }
23501               /* Fail if a conditional return is undesirable (e.g. on a
23502                  StrongARM), but still allow this if optimizing for size.  */
23503               else if (GET_CODE (scanbody) == return_code
23504                        && !use_return_insn (TRUE, NULL)
23505                        && !optimize_size)
23506                 fail = TRUE;
23507               else if (GET_CODE (scanbody) == return_code)
23508                 {
23509                   arm_ccfsm_state = 2;
23510                   succeed = TRUE;
23511                 }
23512               else if (GET_CODE (scanbody) == PARALLEL)
23513                 {
23514                   switch (get_attr_conds (this_insn))
23515                     {
23516                     case CONDS_NOCOND:
23517                       break;
23518                     default:
23519                       fail = TRUE;
23520                       break;
23521                     }
23522                 }
23523               else
23524                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23525
23526               break;
23527
23528             case INSN:
23529               /* Instructions using or affecting the condition codes make it
23530                  fail.  */
23531               scanbody = PATTERN (this_insn);
23532               if (!(GET_CODE (scanbody) == SET
23533                     || GET_CODE (scanbody) == PARALLEL)
23534                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23535                 fail = TRUE;
23536               break;
23537
23538             default:
23539               break;
23540             }
23541         }
23542       if (succeed)
23543         {
23544           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23545             arm_target_label = CODE_LABEL_NUMBER (label);
23546           else
23547             {
23548               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23549
23550               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23551                 {
23552                   this_insn = next_nonnote_insn (this_insn);
23553                   gcc_assert (!this_insn
23554                               || (!BARRIER_P (this_insn)
23555                                   && !LABEL_P (this_insn)));
23556                 }
23557               if (!this_insn)
23558                 {
23559                   /* Oh, dear! we ran off the end.. give up.  */
23560                   extract_constrain_insn_cached (insn);
23561                   arm_ccfsm_state = 0;
23562                   arm_target_insn = NULL;
23563                   return;
23564                 }
23565               arm_target_insn = this_insn;
23566             }
23567
23568           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23569              what it was.  */
23570           if (!reverse)
23571             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23572
23573           if (reverse || then_not_else)
23574             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23575         }
23576
23577       /* Restore recog_data (getting the attributes of other insns can
23578          destroy this array, but final.c assumes that it remains intact
23579          across this call.  */
23580       extract_constrain_insn_cached (insn);
23581     }
23582 }
23583
23584 /* Output IT instructions.  */
23585 void
23586 thumb2_asm_output_opcode (FILE * stream)
23587 {
23588   char buff[5];
23589   int n;
23590
23591   if (arm_condexec_mask)
23592     {
23593       for (n = 0; n < arm_condexec_masklen; n++)
23594         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23595       buff[n] = 0;
23596       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23597                   arm_condition_codes[arm_current_cc]);
23598       arm_condexec_mask = 0;
23599     }
23600 }
23601
23602 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23603    UNITS_PER_WORD bytes wide.  */
23604 static unsigned int
23605 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23606 {
23607   if (TARGET_32BIT
23608       && regno > PC_REGNUM
23609       && regno != FRAME_POINTER_REGNUM
23610       && regno != ARG_POINTER_REGNUM
23611       && !IS_VFP_REGNUM (regno))
23612     return 1;
23613
23614   return ARM_NUM_REGS (mode);
23615 }
23616
23617 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23618 static bool
23619 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23620 {
23621   if (GET_MODE_CLASS (mode) == MODE_CC)
23622     return (regno == CC_REGNUM
23623             || (TARGET_HARD_FLOAT
23624                 && regno == VFPCC_REGNUM));
23625
23626   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23627     return false;
23628
23629   if (TARGET_THUMB1)
23630     /* For the Thumb we only allow values bigger than SImode in
23631        registers 0 - 6, so that there is always a second low
23632        register available to hold the upper part of the value.
23633        We probably we ought to ensure that the register is the
23634        start of an even numbered register pair.  */
23635     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23636
23637   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23638     {
23639       if (mode == SFmode || mode == SImode)
23640         return VFP_REGNO_OK_FOR_SINGLE (regno);
23641
23642       if (mode == DFmode)
23643         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23644
23645       if (mode == HFmode)
23646         return VFP_REGNO_OK_FOR_SINGLE (regno);
23647
23648       /* VFP registers can hold HImode values.  */
23649       if (mode == HImode)
23650         return VFP_REGNO_OK_FOR_SINGLE (regno);
23651
23652       if (TARGET_NEON)
23653         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23654                || (VALID_NEON_QREG_MODE (mode)
23655                    && NEON_REGNO_OK_FOR_QUAD (regno))
23656                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23657                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23658                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23659                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23660                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23661
23662       return false;
23663     }
23664
23665   if (TARGET_REALLY_IWMMXT)
23666     {
23667       if (IS_IWMMXT_GR_REGNUM (regno))
23668         return mode == SImode;
23669
23670       if (IS_IWMMXT_REGNUM (regno))
23671         return VALID_IWMMXT_REG_MODE (mode);
23672     }
23673
23674   /* We allow almost any value to be stored in the general registers.
23675      Restrict doubleword quantities to even register pairs in ARM state
23676      so that we can use ldrd.  Do not allow very large Neon structure
23677      opaque modes in general registers; they would use too many.  */
23678   if (regno <= LAST_ARM_REGNUM)
23679     {
23680       if (ARM_NUM_REGS (mode) > 4)
23681         return false;
23682
23683       if (TARGET_THUMB2)
23684         return true;
23685
23686       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23687     }
23688
23689   if (regno == FRAME_POINTER_REGNUM
23690       || regno == ARG_POINTER_REGNUM)
23691     /* We only allow integers in the fake hard registers.  */
23692     return GET_MODE_CLASS (mode) == MODE_INT;
23693
23694   return false;
23695 }
23696
23697 /* Implement TARGET_MODES_TIEABLE_P.  */
23698
23699 static bool
23700 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23701 {
23702   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23703     return true;
23704
23705   /* We specifically want to allow elements of "structure" modes to
23706      be tieable to the structure.  This more general condition allows
23707      other rarer situations too.  */
23708   if (TARGET_NEON
23709       && (VALID_NEON_DREG_MODE (mode1)
23710           || VALID_NEON_QREG_MODE (mode1)
23711           || VALID_NEON_STRUCT_MODE (mode1))
23712       && (VALID_NEON_DREG_MODE (mode2)
23713           || VALID_NEON_QREG_MODE (mode2)
23714           || VALID_NEON_STRUCT_MODE (mode2)))
23715     return true;
23716
23717   return false;
23718 }
23719
23720 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23721    not used in arm mode.  */
23722
23723 enum reg_class
23724 arm_regno_class (int regno)
23725 {
23726   if (regno == PC_REGNUM)
23727     return NO_REGS;
23728
23729   if (TARGET_THUMB1)
23730     {
23731       if (regno == STACK_POINTER_REGNUM)
23732         return STACK_REG;
23733       if (regno == CC_REGNUM)
23734         return CC_REG;
23735       if (regno < 8)
23736         return LO_REGS;
23737       return HI_REGS;
23738     }
23739
23740   if (TARGET_THUMB2 && regno < 8)
23741     return LO_REGS;
23742
23743   if (   regno <= LAST_ARM_REGNUM
23744       || regno == FRAME_POINTER_REGNUM
23745       || regno == ARG_POINTER_REGNUM)
23746     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23747
23748   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23749     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23750
23751   if (IS_VFP_REGNUM (regno))
23752     {
23753       if (regno <= D7_VFP_REGNUM)
23754         return VFP_D0_D7_REGS;
23755       else if (regno <= LAST_LO_VFP_REGNUM)
23756         return VFP_LO_REGS;
23757       else
23758         return VFP_HI_REGS;
23759     }
23760
23761   if (IS_IWMMXT_REGNUM (regno))
23762     return IWMMXT_REGS;
23763
23764   if (IS_IWMMXT_GR_REGNUM (regno))
23765     return IWMMXT_GR_REGS;
23766
23767   return NO_REGS;
23768 }
23769
23770 /* Handle a special case when computing the offset
23771    of an argument from the frame pointer.  */
23772 int
23773 arm_debugger_arg_offset (int value, rtx addr)
23774 {
23775   rtx_insn *insn;
23776
23777   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23778   if (value != 0)
23779     return 0;
23780
23781   /* We can only cope with the case where the address is held in a register.  */
23782   if (!REG_P (addr))
23783     return 0;
23784
23785   /* If we are using the frame pointer to point at the argument, then
23786      an offset of 0 is correct.  */
23787   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23788     return 0;
23789
23790   /* If we are using the stack pointer to point at the
23791      argument, then an offset of 0 is correct.  */
23792   /* ??? Check this is consistent with thumb2 frame layout.  */
23793   if ((TARGET_THUMB || !frame_pointer_needed)
23794       && REGNO (addr) == SP_REGNUM)
23795     return 0;
23796
23797   /* Oh dear.  The argument is pointed to by a register rather
23798      than being held in a register, or being stored at a known
23799      offset from the frame pointer.  Since GDB only understands
23800      those two kinds of argument we must translate the address
23801      held in the register into an offset from the frame pointer.
23802      We do this by searching through the insns for the function
23803      looking to see where this register gets its value.  If the
23804      register is initialized from the frame pointer plus an offset
23805      then we are in luck and we can continue, otherwise we give up.
23806
23807      This code is exercised by producing debugging information
23808      for a function with arguments like this:
23809
23810            double func (double a, double b, int c, double d) {return d;}
23811
23812      Without this code the stab for parameter 'd' will be set to
23813      an offset of 0 from the frame pointer, rather than 8.  */
23814
23815   /* The if() statement says:
23816
23817      If the insn is a normal instruction
23818      and if the insn is setting the value in a register
23819      and if the register being set is the register holding the address of the argument
23820      and if the address is computing by an addition
23821      that involves adding to a register
23822      which is the frame pointer
23823      a constant integer
23824
23825      then...  */
23826
23827   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23828     {
23829       if (   NONJUMP_INSN_P (insn)
23830           && GET_CODE (PATTERN (insn)) == SET
23831           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23832           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23833           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23834           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23835           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23836              )
23837         {
23838           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23839
23840           break;
23841         }
23842     }
23843
23844   if (value == 0)
23845     {
23846       debug_rtx (addr);
23847       warning (0, "unable to compute real location of stacked parameter");
23848       value = 8; /* XXX magic hack */
23849     }
23850
23851   return value;
23852 }
23853 \f
23854 /* Implement TARGET_PROMOTED_TYPE.  */
23855
23856 static tree
23857 arm_promoted_type (const_tree t)
23858 {
23859   if (SCALAR_FLOAT_TYPE_P (t)
23860       && TYPE_PRECISION (t) == 16
23861       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23862     return float_type_node;
23863   return NULL_TREE;
23864 }
23865
23866 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23867    This simply adds HFmode as a supported mode; even though we don't
23868    implement arithmetic on this type directly, it's supported by
23869    optabs conversions, much the way the double-word arithmetic is
23870    special-cased in the default hook.  */
23871
23872 static bool
23873 arm_scalar_mode_supported_p (scalar_mode mode)
23874 {
23875   if (mode == HFmode)
23876     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23877   else if (ALL_FIXED_POINT_MODE_P (mode))
23878     return true;
23879   else
23880     return default_scalar_mode_supported_p (mode);
23881 }
23882
23883 /* Set the value of FLT_EVAL_METHOD.
23884    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23885
23886     0: evaluate all operations and constants, whose semantic type has at
23887        most the range and precision of type float, to the range and
23888        precision of float; evaluate all other operations and constants to
23889        the range and precision of the semantic type;
23890
23891     N, where _FloatN is a supported interchange floating type
23892        evaluate all operations and constants, whose semantic type has at
23893        most the range and precision of _FloatN type, to the range and
23894        precision of the _FloatN type; evaluate all other operations and
23895        constants to the range and precision of the semantic type;
23896
23897    If we have the ARMv8.2-A extensions then we support _Float16 in native
23898    precision, so we should set this to 16.  Otherwise, we support the type,
23899    but want to evaluate expressions in float precision, so set this to
23900    0.  */
23901
23902 static enum flt_eval_method
23903 arm_excess_precision (enum excess_precision_type type)
23904 {
23905   switch (type)
23906     {
23907       case EXCESS_PRECISION_TYPE_FAST:
23908       case EXCESS_PRECISION_TYPE_STANDARD:
23909         /* We can calculate either in 16-bit range and precision or
23910            32-bit range and precision.  Make that decision based on whether
23911            we have native support for the ARMv8.2-A 16-bit floating-point
23912            instructions or not.  */
23913         return (TARGET_VFP_FP16INST
23914                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23915                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23916       case EXCESS_PRECISION_TYPE_IMPLICIT:
23917         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23918       default:
23919         gcc_unreachable ();
23920     }
23921   return FLT_EVAL_METHOD_UNPREDICTABLE;
23922 }
23923
23924
23925 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23926    _Float16 if we are using anything other than ieee format for 16-bit
23927    floating point.  Otherwise, punt to the default implementation.  */
23928 static opt_scalar_float_mode
23929 arm_floatn_mode (int n, bool extended)
23930 {
23931   if (!extended && n == 16)
23932     {
23933       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23934         return HFmode;
23935       return opt_scalar_float_mode ();
23936     }
23937
23938   return default_floatn_mode (n, extended);
23939 }
23940
23941
23942 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23943    not to early-clobber SRC registers in the process.
23944
23945    We assume that the operands described by SRC and DEST represent a
23946    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23947    number of components into which the copy has been decomposed.  */
23948 void
23949 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23950 {
23951   unsigned int i;
23952
23953   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23954       || REGNO (operands[0]) < REGNO (operands[1]))
23955     {
23956       for (i = 0; i < count; i++)
23957         {
23958           operands[2 * i] = dest[i];
23959           operands[2 * i + 1] = src[i];
23960         }
23961     }
23962   else
23963     {
23964       for (i = 0; i < count; i++)
23965         {
23966           operands[2 * i] = dest[count - i - 1];
23967           operands[2 * i + 1] = src[count - i - 1];
23968         }
23969     }
23970 }
23971
23972 /* Split operands into moves from op[1] + op[2] into op[0].  */
23973
23974 void
23975 neon_split_vcombine (rtx operands[3])
23976 {
23977   unsigned int dest = REGNO (operands[0]);
23978   unsigned int src1 = REGNO (operands[1]);
23979   unsigned int src2 = REGNO (operands[2]);
23980   machine_mode halfmode = GET_MODE (operands[1]);
23981   unsigned int halfregs = REG_NREGS (operands[1]);
23982   rtx destlo, desthi;
23983
23984   if (src1 == dest && src2 == dest + halfregs)
23985     {
23986       /* No-op move.  Can't split to nothing; emit something.  */
23987       emit_note (NOTE_INSN_DELETED);
23988       return;
23989     }
23990
23991   /* Preserve register attributes for variable tracking.  */
23992   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23993   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23994                                GET_MODE_SIZE (halfmode));
23995
23996   /* Special case of reversed high/low parts.  Use VSWP.  */
23997   if (src2 == dest && src1 == dest + halfregs)
23998     {
23999       rtx x = gen_rtx_SET (destlo, operands[1]);
24000       rtx y = gen_rtx_SET (desthi, operands[2]);
24001       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24002       return;
24003     }
24004
24005   if (!reg_overlap_mentioned_p (operands[2], destlo))
24006     {
24007       /* Try to avoid unnecessary moves if part of the result
24008          is in the right place already.  */
24009       if (src1 != dest)
24010         emit_move_insn (destlo, operands[1]);
24011       if (src2 != dest + halfregs)
24012         emit_move_insn (desthi, operands[2]);
24013     }
24014   else
24015     {
24016       if (src2 != dest + halfregs)
24017         emit_move_insn (desthi, operands[2]);
24018       if (src1 != dest)
24019         emit_move_insn (destlo, operands[1]);
24020     }
24021 }
24022 \f
24023 /* Return the number (counting from 0) of
24024    the least significant set bit in MASK.  */
24025
24026 inline static int
24027 number_of_first_bit_set (unsigned mask)
24028 {
24029   return ctz_hwi (mask);
24030 }
24031
24032 /* Like emit_multi_reg_push, but allowing for a different set of
24033    registers to be described as saved.  MASK is the set of registers
24034    to be saved; REAL_REGS is the set of registers to be described as
24035    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24036
24037 static rtx_insn *
24038 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24039 {
24040   unsigned long regno;
24041   rtx par[10], tmp, reg;
24042   rtx_insn *insn;
24043   int i, j;
24044
24045   /* Build the parallel of the registers actually being stored.  */
24046   for (i = 0; mask; ++i, mask &= mask - 1)
24047     {
24048       regno = ctz_hwi (mask);
24049       reg = gen_rtx_REG (SImode, regno);
24050
24051       if (i == 0)
24052         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24053       else
24054         tmp = gen_rtx_USE (VOIDmode, reg);
24055
24056       par[i] = tmp;
24057     }
24058
24059   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24060   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24061   tmp = gen_frame_mem (BLKmode, tmp);
24062   tmp = gen_rtx_SET (tmp, par[0]);
24063   par[0] = tmp;
24064
24065   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24066   insn = emit_insn (tmp);
24067
24068   /* Always build the stack adjustment note for unwind info.  */
24069   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24070   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24071   par[0] = tmp;
24072
24073   /* Build the parallel of the registers recorded as saved for unwind.  */
24074   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24075     {
24076       regno = ctz_hwi (real_regs);
24077       reg = gen_rtx_REG (SImode, regno);
24078
24079       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24080       tmp = gen_frame_mem (SImode, tmp);
24081       tmp = gen_rtx_SET (tmp, reg);
24082       RTX_FRAME_RELATED_P (tmp) = 1;
24083       par[j + 1] = tmp;
24084     }
24085
24086   if (j == 0)
24087     tmp = par[0];
24088   else
24089     {
24090       RTX_FRAME_RELATED_P (par[0]) = 1;
24091       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24092     }
24093
24094   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24095
24096   return insn;
24097 }
24098
24099 /* Emit code to push or pop registers to or from the stack.  F is the
24100    assembly file.  MASK is the registers to pop.  */
24101 static void
24102 thumb_pop (FILE *f, unsigned long mask)
24103 {
24104   int regno;
24105   int lo_mask = mask & 0xFF;
24106
24107   gcc_assert (mask);
24108
24109   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24110     {
24111       /* Special case.  Do not generate a POP PC statement here, do it in
24112          thumb_exit() */
24113       thumb_exit (f, -1);
24114       return;
24115     }
24116
24117   fprintf (f, "\tpop\t{");
24118
24119   /* Look at the low registers first.  */
24120   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24121     {
24122       if (lo_mask & 1)
24123         {
24124           asm_fprintf (f, "%r", regno);
24125
24126           if ((lo_mask & ~1) != 0)
24127             fprintf (f, ", ");
24128         }
24129     }
24130
24131   if (mask & (1 << PC_REGNUM))
24132     {
24133       /* Catch popping the PC.  */
24134       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24135           || IS_CMSE_ENTRY (arm_current_func_type ()))
24136         {
24137           /* The PC is never poped directly, instead
24138              it is popped into r3 and then BX is used.  */
24139           fprintf (f, "}\n");
24140
24141           thumb_exit (f, -1);
24142
24143           return;
24144         }
24145       else
24146         {
24147           if (mask & 0xFF)
24148             fprintf (f, ", ");
24149
24150           asm_fprintf (f, "%r", PC_REGNUM);
24151         }
24152     }
24153
24154   fprintf (f, "}\n");
24155 }
24156
24157 /* Generate code to return from a thumb function.
24158    If 'reg_containing_return_addr' is -1, then the return address is
24159    actually on the stack, at the stack pointer.
24160
24161    Note: do not forget to update length attribute of corresponding insn pattern
24162    when changing assembly output (eg. length attribute of epilogue_insns when
24163    updating Armv8-M Baseline Security Extensions register clearing
24164    sequences).  */
24165 static void
24166 thumb_exit (FILE *f, int reg_containing_return_addr)
24167 {
24168   unsigned regs_available_for_popping;
24169   unsigned regs_to_pop;
24170   int pops_needed;
24171   unsigned available;
24172   unsigned required;
24173   machine_mode mode;
24174   int size;
24175   int restore_a4 = FALSE;
24176
24177   /* Compute the registers we need to pop.  */
24178   regs_to_pop = 0;
24179   pops_needed = 0;
24180
24181   if (reg_containing_return_addr == -1)
24182     {
24183       regs_to_pop |= 1 << LR_REGNUM;
24184       ++pops_needed;
24185     }
24186
24187   if (TARGET_BACKTRACE)
24188     {
24189       /* Restore the (ARM) frame pointer and stack pointer.  */
24190       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24191       pops_needed += 2;
24192     }
24193
24194   /* If there is nothing to pop then just emit the BX instruction and
24195      return.  */
24196   if (pops_needed == 0)
24197     {
24198       if (crtl->calls_eh_return)
24199         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24200
24201       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24202         {
24203           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24204                        reg_containing_return_addr);
24205           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24206         }
24207       else
24208         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24209       return;
24210     }
24211   /* Otherwise if we are not supporting interworking and we have not created
24212      a backtrace structure and the function was not entered in ARM mode then
24213      just pop the return address straight into the PC.  */
24214   else if (!TARGET_INTERWORK
24215            && !TARGET_BACKTRACE
24216            && !is_called_in_ARM_mode (current_function_decl)
24217            && !crtl->calls_eh_return
24218            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24219     {
24220       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24221       return;
24222     }
24223
24224   /* Find out how many of the (return) argument registers we can corrupt.  */
24225   regs_available_for_popping = 0;
24226
24227   /* If returning via __builtin_eh_return, the bottom three registers
24228      all contain information needed for the return.  */
24229   if (crtl->calls_eh_return)
24230     size = 12;
24231   else
24232     {
24233       /* If we can deduce the registers used from the function's
24234          return value.  This is more reliable that examining
24235          df_regs_ever_live_p () because that will be set if the register is
24236          ever used in the function, not just if the register is used
24237          to hold a return value.  */
24238
24239       if (crtl->return_rtx != 0)
24240         mode = GET_MODE (crtl->return_rtx);
24241       else
24242         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24243
24244       size = GET_MODE_SIZE (mode);
24245
24246       if (size == 0)
24247         {
24248           /* In a void function we can use any argument register.
24249              In a function that returns a structure on the stack
24250              we can use the second and third argument registers.  */
24251           if (mode == VOIDmode)
24252             regs_available_for_popping =
24253               (1 << ARG_REGISTER (1))
24254               | (1 << ARG_REGISTER (2))
24255               | (1 << ARG_REGISTER (3));
24256           else
24257             regs_available_for_popping =
24258               (1 << ARG_REGISTER (2))
24259               | (1 << ARG_REGISTER (3));
24260         }
24261       else if (size <= 4)
24262         regs_available_for_popping =
24263           (1 << ARG_REGISTER (2))
24264           | (1 << ARG_REGISTER (3));
24265       else if (size <= 8)
24266         regs_available_for_popping =
24267           (1 << ARG_REGISTER (3));
24268     }
24269
24270   /* Match registers to be popped with registers into which we pop them.  */
24271   for (available = regs_available_for_popping,
24272        required  = regs_to_pop;
24273        required != 0 && available != 0;
24274        available &= ~(available & - available),
24275        required  &= ~(required  & - required))
24276     -- pops_needed;
24277
24278   /* If we have any popping registers left over, remove them.  */
24279   if (available > 0)
24280     regs_available_for_popping &= ~available;
24281
24282   /* Otherwise if we need another popping register we can use
24283      the fourth argument register.  */
24284   else if (pops_needed)
24285     {
24286       /* If we have not found any free argument registers and
24287          reg a4 contains the return address, we must move it.  */
24288       if (regs_available_for_popping == 0
24289           && reg_containing_return_addr == LAST_ARG_REGNUM)
24290         {
24291           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24292           reg_containing_return_addr = LR_REGNUM;
24293         }
24294       else if (size > 12)
24295         {
24296           /* Register a4 is being used to hold part of the return value,
24297              but we have dire need of a free, low register.  */
24298           restore_a4 = TRUE;
24299
24300           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24301         }
24302
24303       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24304         {
24305           /* The fourth argument register is available.  */
24306           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24307
24308           --pops_needed;
24309         }
24310     }
24311
24312   /* Pop as many registers as we can.  */
24313   thumb_pop (f, regs_available_for_popping);
24314
24315   /* Process the registers we popped.  */
24316   if (reg_containing_return_addr == -1)
24317     {
24318       /* The return address was popped into the lowest numbered register.  */
24319       regs_to_pop &= ~(1 << LR_REGNUM);
24320
24321       reg_containing_return_addr =
24322         number_of_first_bit_set (regs_available_for_popping);
24323
24324       /* Remove this register for the mask of available registers, so that
24325          the return address will not be corrupted by further pops.  */
24326       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24327     }
24328
24329   /* If we popped other registers then handle them here.  */
24330   if (regs_available_for_popping)
24331     {
24332       int frame_pointer;
24333
24334       /* Work out which register currently contains the frame pointer.  */
24335       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24336
24337       /* Move it into the correct place.  */
24338       asm_fprintf (f, "\tmov\t%r, %r\n",
24339                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24340
24341       /* (Temporarily) remove it from the mask of popped registers.  */
24342       regs_available_for_popping &= ~(1 << frame_pointer);
24343       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24344
24345       if (regs_available_for_popping)
24346         {
24347           int stack_pointer;
24348
24349           /* We popped the stack pointer as well,
24350              find the register that contains it.  */
24351           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24352
24353           /* Move it into the stack register.  */
24354           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24355
24356           /* At this point we have popped all necessary registers, so
24357              do not worry about restoring regs_available_for_popping
24358              to its correct value:
24359
24360              assert (pops_needed == 0)
24361              assert (regs_available_for_popping == (1 << frame_pointer))
24362              assert (regs_to_pop == (1 << STACK_POINTER))  */
24363         }
24364       else
24365         {
24366           /* Since we have just move the popped value into the frame
24367              pointer, the popping register is available for reuse, and
24368              we know that we still have the stack pointer left to pop.  */
24369           regs_available_for_popping |= (1 << frame_pointer);
24370         }
24371     }
24372
24373   /* If we still have registers left on the stack, but we no longer have
24374      any registers into which we can pop them, then we must move the return
24375      address into the link register and make available the register that
24376      contained it.  */
24377   if (regs_available_for_popping == 0 && pops_needed > 0)
24378     {
24379       regs_available_for_popping |= 1 << reg_containing_return_addr;
24380
24381       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24382                    reg_containing_return_addr);
24383
24384       reg_containing_return_addr = LR_REGNUM;
24385     }
24386
24387   /* If we have registers left on the stack then pop some more.
24388      We know that at most we will want to pop FP and SP.  */
24389   if (pops_needed > 0)
24390     {
24391       int  popped_into;
24392       int  move_to;
24393
24394       thumb_pop (f, regs_available_for_popping);
24395
24396       /* We have popped either FP or SP.
24397          Move whichever one it is into the correct register.  */
24398       popped_into = number_of_first_bit_set (regs_available_for_popping);
24399       move_to     = number_of_first_bit_set (regs_to_pop);
24400
24401       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24402       --pops_needed;
24403     }
24404
24405   /* If we still have not popped everything then we must have only
24406      had one register available to us and we are now popping the SP.  */
24407   if (pops_needed > 0)
24408     {
24409       int  popped_into;
24410
24411       thumb_pop (f, regs_available_for_popping);
24412
24413       popped_into = number_of_first_bit_set (regs_available_for_popping);
24414
24415       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24416       /*
24417         assert (regs_to_pop == (1 << STACK_POINTER))
24418         assert (pops_needed == 1)
24419       */
24420     }
24421
24422   /* If necessary restore the a4 register.  */
24423   if (restore_a4)
24424     {
24425       if (reg_containing_return_addr != LR_REGNUM)
24426         {
24427           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24428           reg_containing_return_addr = LR_REGNUM;
24429         }
24430
24431       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24432     }
24433
24434   if (crtl->calls_eh_return)
24435     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24436
24437   /* Return to caller.  */
24438   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24439     {
24440       /* This is for the cases where LR is not being used to contain the return
24441          address.  It may therefore contain information that we might not want
24442          to leak, hence it must be cleared.  The value in R0 will never be a
24443          secret at this point, so it is safe to use it, see the clearing code
24444          in 'cmse_nonsecure_entry_clear_before_return'.  */
24445       if (reg_containing_return_addr != LR_REGNUM)
24446         asm_fprintf (f, "\tmov\tlr, r0\n");
24447
24448       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24449       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24450     }
24451   else
24452     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24453 }
24454 \f
24455 /* Scan INSN just before assembler is output for it.
24456    For Thumb-1, we track the status of the condition codes; this
24457    information is used in the cbranchsi4_insn pattern.  */
24458 void
24459 thumb1_final_prescan_insn (rtx_insn *insn)
24460 {
24461   if (flag_print_asm_name)
24462     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24463                  INSN_ADDRESSES (INSN_UID (insn)));
24464   /* Don't overwrite the previous setter when we get to a cbranch.  */
24465   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24466     {
24467       enum attr_conds conds;
24468
24469       if (cfun->machine->thumb1_cc_insn)
24470         {
24471           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24472               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24473             CC_STATUS_INIT;
24474         }
24475       conds = get_attr_conds (insn);
24476       if (conds == CONDS_SET)
24477         {
24478           rtx set = single_set (insn);
24479           cfun->machine->thumb1_cc_insn = insn;
24480           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24481           cfun->machine->thumb1_cc_op1 = const0_rtx;
24482           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24483           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24484             {
24485               rtx src1 = XEXP (SET_SRC (set), 1);
24486               if (src1 == const0_rtx)
24487                 cfun->machine->thumb1_cc_mode = CCmode;
24488             }
24489           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24490             {
24491               /* Record the src register operand instead of dest because
24492                  cprop_hardreg pass propagates src.  */
24493               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24494             }
24495         }
24496       else if (conds != CONDS_NOCOND)
24497         cfun->machine->thumb1_cc_insn = NULL_RTX;
24498     }
24499
24500     /* Check if unexpected far jump is used.  */
24501     if (cfun->machine->lr_save_eliminated
24502         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24503       internal_error("Unexpected thumb1 far jump");
24504 }
24505
24506 int
24507 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24508 {
24509   unsigned HOST_WIDE_INT mask = 0xff;
24510   int i;
24511
24512   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24513   if (val == 0) /* XXX */
24514     return 0;
24515
24516   for (i = 0; i < 25; i++)
24517     if ((val & (mask << i)) == val)
24518       return 1;
24519
24520   return 0;
24521 }
24522
24523 /* Returns nonzero if the current function contains,
24524    or might contain a far jump.  */
24525 static int
24526 thumb_far_jump_used_p (void)
24527 {
24528   rtx_insn *insn;
24529   bool far_jump = false;
24530   unsigned int func_size = 0;
24531
24532   /* If we have already decided that far jumps may be used,
24533      do not bother checking again, and always return true even if
24534      it turns out that they are not being used.  Once we have made
24535      the decision that far jumps are present (and that hence the link
24536      register will be pushed onto the stack) we cannot go back on it.  */
24537   if (cfun->machine->far_jump_used)
24538     return 1;
24539
24540   /* If this function is not being called from the prologue/epilogue
24541      generation code then it must be being called from the
24542      INITIAL_ELIMINATION_OFFSET macro.  */
24543   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24544     {
24545       /* In this case we know that we are being asked about the elimination
24546          of the arg pointer register.  If that register is not being used,
24547          then there are no arguments on the stack, and we do not have to
24548          worry that a far jump might force the prologue to push the link
24549          register, changing the stack offsets.  In this case we can just
24550          return false, since the presence of far jumps in the function will
24551          not affect stack offsets.
24552
24553          If the arg pointer is live (or if it was live, but has now been
24554          eliminated and so set to dead) then we do have to test to see if
24555          the function might contain a far jump.  This test can lead to some
24556          false negatives, since before reload is completed, then length of
24557          branch instructions is not known, so gcc defaults to returning their
24558          longest length, which in turn sets the far jump attribute to true.
24559
24560          A false negative will not result in bad code being generated, but it
24561          will result in a needless push and pop of the link register.  We
24562          hope that this does not occur too often.
24563
24564          If we need doubleword stack alignment this could affect the other
24565          elimination offsets so we can't risk getting it wrong.  */
24566       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24567         cfun->machine->arg_pointer_live = 1;
24568       else if (!cfun->machine->arg_pointer_live)
24569         return 0;
24570     }
24571
24572   /* We should not change far_jump_used during or after reload, as there is
24573      no chance to change stack frame layout.  */
24574   if (reload_in_progress || reload_completed)
24575     return 0;
24576
24577   /* Check to see if the function contains a branch
24578      insn with the far jump attribute set.  */
24579   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24580     {
24581       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24582         {
24583           far_jump = true;
24584         }
24585       func_size += get_attr_length (insn);
24586     }
24587
24588   /* Attribute far_jump will always be true for thumb1 before
24589      shorten_branch pass.  So checking far_jump attribute before
24590      shorten_branch isn't much useful.
24591
24592      Following heuristic tries to estimate more accurately if a far jump
24593      may finally be used.  The heuristic is very conservative as there is
24594      no chance to roll-back the decision of not to use far jump.
24595
24596      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24597      2-byte insn is associated with a 4 byte constant pool.  Using
24598      function size 2048/3 as the threshold is conservative enough.  */
24599   if (far_jump)
24600     {
24601       if ((func_size * 3) >= 2048)
24602         {
24603           /* Record the fact that we have decided that
24604              the function does use far jumps.  */
24605           cfun->machine->far_jump_used = 1;
24606           return 1;
24607         }
24608     }
24609
24610   return 0;
24611 }
24612
24613 /* Return nonzero if FUNC must be entered in ARM mode.  */
24614 static bool
24615 is_called_in_ARM_mode (tree func)
24616 {
24617   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24618
24619   /* Ignore the problem about functions whose address is taken.  */
24620   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24621     return true;
24622
24623 #ifdef ARM_PE
24624   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24625 #else
24626   return false;
24627 #endif
24628 }
24629
24630 /* Given the stack offsets and register mask in OFFSETS, decide how
24631    many additional registers to push instead of subtracting a constant
24632    from SP.  For epilogues the principle is the same except we use pop.
24633    FOR_PROLOGUE indicates which we're generating.  */
24634 static int
24635 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24636 {
24637   HOST_WIDE_INT amount;
24638   unsigned long live_regs_mask = offsets->saved_regs_mask;
24639   /* Extract a mask of the ones we can give to the Thumb's push/pop
24640      instruction.  */
24641   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24642   /* Then count how many other high registers will need to be pushed.  */
24643   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24644   int n_free, reg_base, size;
24645
24646   if (!for_prologue && frame_pointer_needed)
24647     amount = offsets->locals_base - offsets->saved_regs;
24648   else
24649     amount = offsets->outgoing_args - offsets->saved_regs;
24650
24651   /* If the stack frame size is 512 exactly, we can save one load
24652      instruction, which should make this a win even when optimizing
24653      for speed.  */
24654   if (!optimize_size && amount != 512)
24655     return 0;
24656
24657   /* Can't do this if there are high registers to push.  */
24658   if (high_regs_pushed != 0)
24659     return 0;
24660
24661   /* Shouldn't do it in the prologue if no registers would normally
24662      be pushed at all.  In the epilogue, also allow it if we'll have
24663      a pop insn for the PC.  */
24664   if  (l_mask == 0
24665        && (for_prologue
24666            || TARGET_BACKTRACE
24667            || (live_regs_mask & 1 << LR_REGNUM) == 0
24668            || TARGET_INTERWORK
24669            || crtl->args.pretend_args_size != 0))
24670     return 0;
24671
24672   /* Don't do this if thumb_expand_prologue wants to emit instructions
24673      between the push and the stack frame allocation.  */
24674   if (for_prologue
24675       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24676           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24677     return 0;
24678
24679   reg_base = 0;
24680   n_free = 0;
24681   if (!for_prologue)
24682     {
24683       size = arm_size_return_regs ();
24684       reg_base = ARM_NUM_INTS (size);
24685       live_regs_mask >>= reg_base;
24686     }
24687
24688   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24689          && (for_prologue || call_used_regs[reg_base + n_free]))
24690     {
24691       live_regs_mask >>= 1;
24692       n_free++;
24693     }
24694
24695   if (n_free == 0)
24696     return 0;
24697   gcc_assert (amount / 4 * 4 == amount);
24698
24699   if (amount >= 512 && (amount - n_free * 4) < 512)
24700     return (amount - 508) / 4;
24701   if (amount <= n_free * 4)
24702     return amount / 4;
24703   return 0;
24704 }
24705
24706 /* The bits which aren't usefully expanded as rtl.  */
24707 const char *
24708 thumb1_unexpanded_epilogue (void)
24709 {
24710   arm_stack_offsets *offsets;
24711   int regno;
24712   unsigned long live_regs_mask = 0;
24713   int high_regs_pushed = 0;
24714   int extra_pop;
24715   int had_to_push_lr;
24716   int size;
24717
24718   if (cfun->machine->return_used_this_function != 0)
24719     return "";
24720
24721   if (IS_NAKED (arm_current_func_type ()))
24722     return "";
24723
24724   offsets = arm_get_frame_offsets ();
24725   live_regs_mask = offsets->saved_regs_mask;
24726   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24727
24728   /* If we can deduce the registers used from the function's return value.
24729      This is more reliable that examining df_regs_ever_live_p () because that
24730      will be set if the register is ever used in the function, not just if
24731      the register is used to hold a return value.  */
24732   size = arm_size_return_regs ();
24733
24734   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24735   if (extra_pop > 0)
24736     {
24737       unsigned long extra_mask = (1 << extra_pop) - 1;
24738       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24739     }
24740
24741   /* The prolog may have pushed some high registers to use as
24742      work registers.  e.g. the testsuite file:
24743      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24744      compiles to produce:
24745         push    {r4, r5, r6, r7, lr}
24746         mov     r7, r9
24747         mov     r6, r8
24748         push    {r6, r7}
24749      as part of the prolog.  We have to undo that pushing here.  */
24750
24751   if (high_regs_pushed)
24752     {
24753       unsigned long mask = live_regs_mask & 0xff;
24754       int next_hi_reg;
24755
24756       /* The available low registers depend on the size of the value we are
24757          returning.  */
24758       if (size <= 12)
24759         mask |=  1 << 3;
24760       if (size <= 8)
24761         mask |= 1 << 2;
24762
24763       if (mask == 0)
24764         /* Oh dear!  We have no low registers into which we can pop
24765            high registers!  */
24766         internal_error
24767           ("no low registers available for popping high registers");
24768
24769       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24770         if (live_regs_mask & (1 << next_hi_reg))
24771           break;
24772
24773       while (high_regs_pushed)
24774         {
24775           /* Find lo register(s) into which the high register(s) can
24776              be popped.  */
24777           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24778             {
24779               if (mask & (1 << regno))
24780                 high_regs_pushed--;
24781               if (high_regs_pushed == 0)
24782                 break;
24783             }
24784
24785           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24786
24787           /* Pop the values into the low register(s).  */
24788           thumb_pop (asm_out_file, mask);
24789
24790           /* Move the value(s) into the high registers.  */
24791           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24792             {
24793               if (mask & (1 << regno))
24794                 {
24795                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24796                                regno);
24797
24798                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24799                     if (live_regs_mask & (1 << next_hi_reg))
24800                       break;
24801                 }
24802             }
24803         }
24804       live_regs_mask &= ~0x0f00;
24805     }
24806
24807   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24808   live_regs_mask &= 0xff;
24809
24810   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24811     {
24812       /* Pop the return address into the PC.  */
24813       if (had_to_push_lr)
24814         live_regs_mask |= 1 << PC_REGNUM;
24815
24816       /* Either no argument registers were pushed or a backtrace
24817          structure was created which includes an adjusted stack
24818          pointer, so just pop everything.  */
24819       if (live_regs_mask)
24820         thumb_pop (asm_out_file, live_regs_mask);
24821
24822       /* We have either just popped the return address into the
24823          PC or it is was kept in LR for the entire function.
24824          Note that thumb_pop has already called thumb_exit if the
24825          PC was in the list.  */
24826       if (!had_to_push_lr)
24827         thumb_exit (asm_out_file, LR_REGNUM);
24828     }
24829   else
24830     {
24831       /* Pop everything but the return address.  */
24832       if (live_regs_mask)
24833         thumb_pop (asm_out_file, live_regs_mask);
24834
24835       if (had_to_push_lr)
24836         {
24837           if (size > 12)
24838             {
24839               /* We have no free low regs, so save one.  */
24840               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24841                            LAST_ARG_REGNUM);
24842             }
24843
24844           /* Get the return address into a temporary register.  */
24845           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24846
24847           if (size > 12)
24848             {
24849               /* Move the return address to lr.  */
24850               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24851                            LAST_ARG_REGNUM);
24852               /* Restore the low register.  */
24853               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24854                            IP_REGNUM);
24855               regno = LR_REGNUM;
24856             }
24857           else
24858             regno = LAST_ARG_REGNUM;
24859         }
24860       else
24861         regno = LR_REGNUM;
24862
24863       /* Remove the argument registers that were pushed onto the stack.  */
24864       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24865                    SP_REGNUM, SP_REGNUM,
24866                    crtl->args.pretend_args_size);
24867
24868       thumb_exit (asm_out_file, regno);
24869     }
24870
24871   return "";
24872 }
24873
24874 /* Functions to save and restore machine-specific function data.  */
24875 static struct machine_function *
24876 arm_init_machine_status (void)
24877 {
24878   struct machine_function *machine;
24879   machine = ggc_cleared_alloc<machine_function> ();
24880
24881 #if ARM_FT_UNKNOWN != 0
24882   machine->func_type = ARM_FT_UNKNOWN;
24883 #endif
24884   return machine;
24885 }
24886
24887 /* Return an RTX indicating where the return address to the
24888    calling function can be found.  */
24889 rtx
24890 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24891 {
24892   if (count != 0)
24893     return NULL_RTX;
24894
24895   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24896 }
24897
24898 /* Do anything needed before RTL is emitted for each function.  */
24899 void
24900 arm_init_expanders (void)
24901 {
24902   /* Arrange to initialize and mark the machine per-function status.  */
24903   init_machine_status = arm_init_machine_status;
24904
24905   /* This is to stop the combine pass optimizing away the alignment
24906      adjustment of va_arg.  */
24907   /* ??? It is claimed that this should not be necessary.  */
24908   if (cfun)
24909     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24910 }
24911
24912 /* Check that FUNC is called with a different mode.  */
24913
24914 bool
24915 arm_change_mode_p (tree func)
24916 {
24917   if (TREE_CODE (func) != FUNCTION_DECL)
24918     return false;
24919
24920   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24921
24922   if (!callee_tree)
24923     callee_tree = target_option_default_node;
24924
24925   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24926   int flags = callee_opts->x_target_flags;
24927
24928   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24929 }
24930
24931 /* Like arm_compute_initial_elimination offset.  Simpler because there
24932    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24933    to point at the base of the local variables after static stack
24934    space for a function has been allocated.  */
24935
24936 HOST_WIDE_INT
24937 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24938 {
24939   arm_stack_offsets *offsets;
24940
24941   offsets = arm_get_frame_offsets ();
24942
24943   switch (from)
24944     {
24945     case ARG_POINTER_REGNUM:
24946       switch (to)
24947         {
24948         case STACK_POINTER_REGNUM:
24949           return offsets->outgoing_args - offsets->saved_args;
24950
24951         case FRAME_POINTER_REGNUM:
24952           return offsets->soft_frame - offsets->saved_args;
24953
24954         case ARM_HARD_FRAME_POINTER_REGNUM:
24955           return offsets->saved_regs - offsets->saved_args;
24956
24957         case THUMB_HARD_FRAME_POINTER_REGNUM:
24958           return offsets->locals_base - offsets->saved_args;
24959
24960         default:
24961           gcc_unreachable ();
24962         }
24963       break;
24964
24965     case FRAME_POINTER_REGNUM:
24966       switch (to)
24967         {
24968         case STACK_POINTER_REGNUM:
24969           return offsets->outgoing_args - offsets->soft_frame;
24970
24971         case ARM_HARD_FRAME_POINTER_REGNUM:
24972           return offsets->saved_regs - offsets->soft_frame;
24973
24974         case THUMB_HARD_FRAME_POINTER_REGNUM:
24975           return offsets->locals_base - offsets->soft_frame;
24976
24977         default:
24978           gcc_unreachable ();
24979         }
24980       break;
24981
24982     default:
24983       gcc_unreachable ();
24984     }
24985 }
24986
24987 /* Generate the function's prologue.  */
24988
24989 void
24990 thumb1_expand_prologue (void)
24991 {
24992   rtx_insn *insn;
24993
24994   HOST_WIDE_INT amount;
24995   HOST_WIDE_INT size;
24996   arm_stack_offsets *offsets;
24997   unsigned long func_type;
24998   int regno;
24999   unsigned long live_regs_mask;
25000   unsigned long l_mask;
25001   unsigned high_regs_pushed = 0;
25002   bool lr_needs_saving;
25003
25004   func_type = arm_current_func_type ();
25005
25006   /* Naked functions don't have prologues.  */
25007   if (IS_NAKED (func_type))
25008     {
25009       if (flag_stack_usage_info)
25010         current_function_static_stack_size = 0;
25011       return;
25012     }
25013
25014   if (IS_INTERRUPT (func_type))
25015     {
25016       error ("interrupt Service Routines cannot be coded in Thumb mode");
25017       return;
25018     }
25019
25020   if (is_called_in_ARM_mode (current_function_decl))
25021     emit_insn (gen_prologue_thumb1_interwork ());
25022
25023   offsets = arm_get_frame_offsets ();
25024   live_regs_mask = offsets->saved_regs_mask;
25025   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25026
25027   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25028   l_mask = live_regs_mask & 0x40ff;
25029   /* Then count how many other high registers will need to be pushed.  */
25030   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25031
25032   if (crtl->args.pretend_args_size)
25033     {
25034       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25035
25036       if (cfun->machine->uses_anonymous_args)
25037         {
25038           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25039           unsigned long mask;
25040
25041           mask = 1ul << (LAST_ARG_REGNUM + 1);
25042           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25043
25044           insn = thumb1_emit_multi_reg_push (mask, 0);
25045         }
25046       else
25047         {
25048           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25049                                         stack_pointer_rtx, x));
25050         }
25051       RTX_FRAME_RELATED_P (insn) = 1;
25052     }
25053
25054   if (TARGET_BACKTRACE)
25055     {
25056       HOST_WIDE_INT offset = 0;
25057       unsigned work_register;
25058       rtx work_reg, x, arm_hfp_rtx;
25059
25060       /* We have been asked to create a stack backtrace structure.
25061          The code looks like this:
25062
25063          0   .align 2
25064          0   func:
25065          0     sub   SP, #16         Reserve space for 4 registers.
25066          2     push  {R7}            Push low registers.
25067          4     add   R7, SP, #20     Get the stack pointer before the push.
25068          6     str   R7, [SP, #8]    Store the stack pointer
25069                                         (before reserving the space).
25070          8     mov   R7, PC          Get hold of the start of this code + 12.
25071         10     str   R7, [SP, #16]   Store it.
25072         12     mov   R7, FP          Get hold of the current frame pointer.
25073         14     str   R7, [SP, #4]    Store it.
25074         16     mov   R7, LR          Get hold of the current return address.
25075         18     str   R7, [SP, #12]   Store it.
25076         20     add   R7, SP, #16     Point at the start of the
25077                                         backtrace structure.
25078         22     mov   FP, R7          Put this value into the frame pointer.  */
25079
25080       work_register = thumb_find_work_register (live_regs_mask);
25081       work_reg = gen_rtx_REG (SImode, work_register);
25082       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25083
25084       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25085                                     stack_pointer_rtx, GEN_INT (-16)));
25086       RTX_FRAME_RELATED_P (insn) = 1;
25087
25088       if (l_mask)
25089         {
25090           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25091           RTX_FRAME_RELATED_P (insn) = 1;
25092           lr_needs_saving = false;
25093
25094           offset = bit_count (l_mask) * UNITS_PER_WORD;
25095         }
25096
25097       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25098       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25099
25100       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25101       x = gen_frame_mem (SImode, x);
25102       emit_move_insn (x, work_reg);
25103
25104       /* Make sure that the instruction fetching the PC is in the right place
25105          to calculate "start of backtrace creation code + 12".  */
25106       /* ??? The stores using the common WORK_REG ought to be enough to
25107          prevent the scheduler from doing anything weird.  Failing that
25108          we could always move all of the following into an UNSPEC_VOLATILE.  */
25109       if (l_mask)
25110         {
25111           x = gen_rtx_REG (SImode, PC_REGNUM);
25112           emit_move_insn (work_reg, x);
25113
25114           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25115           x = gen_frame_mem (SImode, x);
25116           emit_move_insn (x, work_reg);
25117
25118           emit_move_insn (work_reg, arm_hfp_rtx);
25119
25120           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25121           x = gen_frame_mem (SImode, x);
25122           emit_move_insn (x, work_reg);
25123         }
25124       else
25125         {
25126           emit_move_insn (work_reg, arm_hfp_rtx);
25127
25128           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25129           x = gen_frame_mem (SImode, x);
25130           emit_move_insn (x, work_reg);
25131
25132           x = gen_rtx_REG (SImode, PC_REGNUM);
25133           emit_move_insn (work_reg, x);
25134
25135           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25136           x = gen_frame_mem (SImode, x);
25137           emit_move_insn (x, work_reg);
25138         }
25139
25140       x = gen_rtx_REG (SImode, LR_REGNUM);
25141       emit_move_insn (work_reg, x);
25142
25143       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25144       x = gen_frame_mem (SImode, x);
25145       emit_move_insn (x, work_reg);
25146
25147       x = GEN_INT (offset + 12);
25148       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25149
25150       emit_move_insn (arm_hfp_rtx, work_reg);
25151     }
25152   /* Optimization:  If we are not pushing any low registers but we are going
25153      to push some high registers then delay our first push.  This will just
25154      be a push of LR and we can combine it with the push of the first high
25155      register.  */
25156   else if ((l_mask & 0xff) != 0
25157            || (high_regs_pushed == 0 && lr_needs_saving))
25158     {
25159       unsigned long mask = l_mask;
25160       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25161       insn = thumb1_emit_multi_reg_push (mask, mask);
25162       RTX_FRAME_RELATED_P (insn) = 1;
25163       lr_needs_saving = false;
25164     }
25165
25166   if (high_regs_pushed)
25167     {
25168       unsigned pushable_regs;
25169       unsigned next_hi_reg;
25170       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25171                                                  : crtl->args.info.nregs;
25172       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25173
25174       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25175         if (live_regs_mask & (1 << next_hi_reg))
25176           break;
25177
25178       /* Here we need to mask out registers used for passing arguments
25179          even if they can be pushed.  This is to avoid using them to stash the high
25180          registers.  Such kind of stash may clobber the use of arguments.  */
25181       pushable_regs = l_mask & (~arg_regs_mask);
25182       if (lr_needs_saving)
25183         pushable_regs &= ~(1 << LR_REGNUM);
25184
25185       if (pushable_regs == 0)
25186         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25187
25188       while (high_regs_pushed > 0)
25189         {
25190           unsigned long real_regs_mask = 0;
25191           unsigned long push_mask = 0;
25192
25193           for (regno = LR_REGNUM; regno >= 0; regno --)
25194             {
25195               if (pushable_regs & (1 << regno))
25196                 {
25197                   emit_move_insn (gen_rtx_REG (SImode, regno),
25198                                   gen_rtx_REG (SImode, next_hi_reg));
25199
25200                   high_regs_pushed --;
25201                   real_regs_mask |= (1 << next_hi_reg);
25202                   push_mask |= (1 << regno);
25203
25204                   if (high_regs_pushed)
25205                     {
25206                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25207                            next_hi_reg --)
25208                         if (live_regs_mask & (1 << next_hi_reg))
25209                           break;
25210                     }
25211                   else
25212                     break;
25213                 }
25214             }
25215
25216           /* If we had to find a work register and we have not yet
25217              saved the LR then add it to the list of regs to push.  */
25218           if (lr_needs_saving)
25219             {
25220               push_mask |= 1 << LR_REGNUM;
25221               real_regs_mask |= 1 << LR_REGNUM;
25222               lr_needs_saving = false;
25223             }
25224
25225           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25226           RTX_FRAME_RELATED_P (insn) = 1;
25227         }
25228     }
25229
25230   /* Load the pic register before setting the frame pointer,
25231      so we can use r7 as a temporary work register.  */
25232   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25233     arm_load_pic_register (live_regs_mask);
25234
25235   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25236     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25237                     stack_pointer_rtx);
25238
25239   size = offsets->outgoing_args - offsets->saved_args;
25240   if (flag_stack_usage_info)
25241     current_function_static_stack_size = size;
25242
25243   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25244   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25245        || flag_stack_clash_protection)
25246       && size)
25247     sorry ("-fstack-check=specific for Thumb-1");
25248
25249   amount = offsets->outgoing_args - offsets->saved_regs;
25250   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25251   if (amount)
25252     {
25253       if (amount < 512)
25254         {
25255           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25256                                         GEN_INT (- amount)));
25257           RTX_FRAME_RELATED_P (insn) = 1;
25258         }
25259       else
25260         {
25261           rtx reg, dwarf;
25262
25263           /* The stack decrement is too big for an immediate value in a single
25264              insn.  In theory we could issue multiple subtracts, but after
25265              three of them it becomes more space efficient to place the full
25266              value in the constant pool and load into a register.  (Also the
25267              ARM debugger really likes to see only one stack decrement per
25268              function).  So instead we look for a scratch register into which
25269              we can load the decrement, and then we subtract this from the
25270              stack pointer.  Unfortunately on the thumb the only available
25271              scratch registers are the argument registers, and we cannot use
25272              these as they may hold arguments to the function.  Instead we
25273              attempt to locate a call preserved register which is used by this
25274              function.  If we can find one, then we know that it will have
25275              been pushed at the start of the prologue and so we can corrupt
25276              it now.  */
25277           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25278             if (live_regs_mask & (1 << regno))
25279               break;
25280
25281           gcc_assert(regno <= LAST_LO_REGNUM);
25282
25283           reg = gen_rtx_REG (SImode, regno);
25284
25285           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25286
25287           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25288                                         stack_pointer_rtx, reg));
25289
25290           dwarf = gen_rtx_SET (stack_pointer_rtx,
25291                                plus_constant (Pmode, stack_pointer_rtx,
25292                                               -amount));
25293           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25294           RTX_FRAME_RELATED_P (insn) = 1;
25295         }
25296     }
25297
25298   if (frame_pointer_needed)
25299     thumb_set_frame_pointer (offsets);
25300
25301   /* If we are profiling, make sure no instructions are scheduled before
25302      the call to mcount.  Similarly if the user has requested no
25303      scheduling in the prolog.  Similarly if we want non-call exceptions
25304      using the EABI unwinder, to prevent faulting instructions from being
25305      swapped with a stack adjustment.  */
25306   if (crtl->profile || !TARGET_SCHED_PROLOG
25307       || (arm_except_unwind_info (&global_options) == UI_TARGET
25308           && cfun->can_throw_non_call_exceptions))
25309     emit_insn (gen_blockage ());
25310
25311   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25312   if (live_regs_mask & 0xff)
25313     cfun->machine->lr_save_eliminated = 0;
25314 }
25315
25316 /* Clear caller saved registers not used to pass return values and leaked
25317    condition flags before exiting a cmse_nonsecure_entry function.  */
25318
25319 void
25320 cmse_nonsecure_entry_clear_before_return (void)
25321 {
25322   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25323   uint32_t padding_bits_to_clear = 0;
25324   auto_sbitmap to_clear_bitmap (maxregno + 1);
25325   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25326   tree result_type;
25327
25328   bitmap_clear (to_clear_bitmap);
25329   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25330   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25331
25332   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25333      registers.  */
25334   if (TARGET_HARD_FLOAT)
25335     {
25336       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25337
25338       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25339
25340       /* Make sure we don't clear the two scratch registers used to clear the
25341          relevant FPSCR bits in output_return_instruction.  */
25342       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25343       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25344       emit_use (gen_rtx_REG (SImode, 4));
25345       bitmap_clear_bit (to_clear_bitmap, 4);
25346     }
25347
25348   /* If the user has defined registers to be caller saved, these are no longer
25349      restored by the function before returning and must thus be cleared for
25350      security purposes.  */
25351   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25352     {
25353       /* We do not touch registers that can be used to pass arguments as per
25354          the AAPCS, since these should never be made callee-saved by user
25355          options.  */
25356       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25357         continue;
25358       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25359         continue;
25360       if (call_used_regs[regno])
25361         bitmap_set_bit (to_clear_bitmap, regno);
25362     }
25363
25364   /* Make sure we do not clear the registers used to return the result in.  */
25365   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25366   if (!VOID_TYPE_P (result_type))
25367     {
25368       uint64_t to_clear_return_mask;
25369       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25370
25371       /* No need to check that we return in registers, because we don't
25372          support returning on stack yet.  */
25373       gcc_assert (REG_P (result_rtl));
25374       to_clear_return_mask
25375         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25376                                      &padding_bits_to_clear);
25377       if (to_clear_return_mask)
25378         {
25379           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25380           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25381             {
25382               if (to_clear_return_mask & (1ULL << regno))
25383                 bitmap_clear_bit (to_clear_bitmap, regno);
25384             }
25385         }
25386     }
25387
25388   if (padding_bits_to_clear != 0)
25389     {
25390       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25391       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25392
25393       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25394          returning a composite type, which only uses r0.  Let's make sure that
25395          r1-r3 is cleared too.  */
25396       bitmap_clear (to_clear_arg_regs_bitmap);
25397       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25398       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25399     }
25400
25401   /* Clear full registers that leak before returning.  */
25402   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25403   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25404   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25405                         clearing_reg);
25406 }
25407
25408 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25409    POP instruction can be generated.  LR should be replaced by PC.  All
25410    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25411    all we really need to check here is if single register is to be
25412    returned, or multiple register return.  */
25413 void
25414 thumb2_expand_return (bool simple_return)
25415 {
25416   int i, num_regs;
25417   unsigned long saved_regs_mask;
25418   arm_stack_offsets *offsets;
25419
25420   offsets = arm_get_frame_offsets ();
25421   saved_regs_mask = offsets->saved_regs_mask;
25422
25423   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25424     if (saved_regs_mask & (1 << i))
25425       num_regs++;
25426
25427   if (!simple_return && saved_regs_mask)
25428     {
25429       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25430          functions or adapt code to handle according to ACLE.  This path should
25431          not be reachable for cmse_nonsecure_entry functions though we prefer
25432          to assert it for now to ensure that future code changes do not silently
25433          change this behavior.  */
25434       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25435       if (num_regs == 1)
25436         {
25437           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25438           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25439           rtx addr = gen_rtx_MEM (SImode,
25440                                   gen_rtx_POST_INC (SImode,
25441                                                     stack_pointer_rtx));
25442           set_mem_alias_set (addr, get_frame_alias_set ());
25443           XVECEXP (par, 0, 0) = ret_rtx;
25444           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25445           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25446           emit_jump_insn (par);
25447         }
25448       else
25449         {
25450           saved_regs_mask &= ~ (1 << LR_REGNUM);
25451           saved_regs_mask |=   (1 << PC_REGNUM);
25452           arm_emit_multi_reg_pop (saved_regs_mask);
25453         }
25454     }
25455   else
25456     {
25457       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25458         cmse_nonsecure_entry_clear_before_return ();
25459       emit_jump_insn (simple_return_rtx);
25460     }
25461 }
25462
25463 void
25464 thumb1_expand_epilogue (void)
25465 {
25466   HOST_WIDE_INT amount;
25467   arm_stack_offsets *offsets;
25468   int regno;
25469
25470   /* Naked functions don't have prologues.  */
25471   if (IS_NAKED (arm_current_func_type ()))
25472     return;
25473
25474   offsets = arm_get_frame_offsets ();
25475   amount = offsets->outgoing_args - offsets->saved_regs;
25476
25477   if (frame_pointer_needed)
25478     {
25479       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25480       amount = offsets->locals_base - offsets->saved_regs;
25481     }
25482   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25483
25484   gcc_assert (amount >= 0);
25485   if (amount)
25486     {
25487       emit_insn (gen_blockage ());
25488
25489       if (amount < 512)
25490         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25491                                GEN_INT (amount)));
25492       else
25493         {
25494           /* r3 is always free in the epilogue.  */
25495           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25496
25497           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25498           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25499         }
25500     }
25501
25502   /* Emit a USE (stack_pointer_rtx), so that
25503      the stack adjustment will not be deleted.  */
25504   emit_insn (gen_force_register_use (stack_pointer_rtx));
25505
25506   if (crtl->profile || !TARGET_SCHED_PROLOG)
25507     emit_insn (gen_blockage ());
25508
25509   /* Emit a clobber for each insn that will be restored in the epilogue,
25510      so that flow2 will get register lifetimes correct.  */
25511   for (regno = 0; regno < 13; regno++)
25512     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25513       emit_clobber (gen_rtx_REG (SImode, regno));
25514
25515   if (! df_regs_ever_live_p (LR_REGNUM))
25516     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25517
25518   /* Clear all caller-saved regs that are not used to return.  */
25519   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25520     cmse_nonsecure_entry_clear_before_return ();
25521 }
25522
25523 /* Epilogue code for APCS frame.  */
25524 static void
25525 arm_expand_epilogue_apcs_frame (bool really_return)
25526 {
25527   unsigned long func_type;
25528   unsigned long saved_regs_mask;
25529   int num_regs = 0;
25530   int i;
25531   int floats_from_frame = 0;
25532   arm_stack_offsets *offsets;
25533
25534   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25535   func_type = arm_current_func_type ();
25536
25537   /* Get frame offsets for ARM.  */
25538   offsets = arm_get_frame_offsets ();
25539   saved_regs_mask = offsets->saved_regs_mask;
25540
25541   /* Find the offset of the floating-point save area in the frame.  */
25542   floats_from_frame
25543     = (offsets->saved_args
25544        + arm_compute_static_chain_stack_bytes ()
25545        - offsets->frame);
25546
25547   /* Compute how many core registers saved and how far away the floats are.  */
25548   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25549     if (saved_regs_mask & (1 << i))
25550       {
25551         num_regs++;
25552         floats_from_frame += 4;
25553       }
25554
25555   if (TARGET_HARD_FLOAT)
25556     {
25557       int start_reg;
25558       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25559
25560       /* The offset is from IP_REGNUM.  */
25561       int saved_size = arm_get_vfp_saved_size ();
25562       if (saved_size > 0)
25563         {
25564           rtx_insn *insn;
25565           floats_from_frame += saved_size;
25566           insn = emit_insn (gen_addsi3 (ip_rtx,
25567                                         hard_frame_pointer_rtx,
25568                                         GEN_INT (-floats_from_frame)));
25569           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25570                                        ip_rtx, hard_frame_pointer_rtx);
25571         }
25572
25573       /* Generate VFP register multi-pop.  */
25574       start_reg = FIRST_VFP_REGNUM;
25575
25576       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25577         /* Look for a case where a reg does not need restoring.  */
25578         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25579             && (!df_regs_ever_live_p (i + 1)
25580                 || call_used_regs[i + 1]))
25581           {
25582             if (start_reg != i)
25583               arm_emit_vfp_multi_reg_pop (start_reg,
25584                                           (i - start_reg) / 2,
25585                                           gen_rtx_REG (SImode,
25586                                                        IP_REGNUM));
25587             start_reg = i + 2;
25588           }
25589
25590       /* Restore the remaining regs that we have discovered (or possibly
25591          even all of them, if the conditional in the for loop never
25592          fired).  */
25593       if (start_reg != i)
25594         arm_emit_vfp_multi_reg_pop (start_reg,
25595                                     (i - start_reg) / 2,
25596                                     gen_rtx_REG (SImode, IP_REGNUM));
25597     }
25598
25599   if (TARGET_IWMMXT)
25600     {
25601       /* The frame pointer is guaranteed to be non-double-word aligned, as
25602          it is set to double-word-aligned old_stack_pointer - 4.  */
25603       rtx_insn *insn;
25604       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25605
25606       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25607         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25608           {
25609             rtx addr = gen_frame_mem (V2SImode,
25610                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25611                                                 - lrm_count * 4));
25612             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25613             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25614                                                gen_rtx_REG (V2SImode, i),
25615                                                NULL_RTX);
25616             lrm_count += 2;
25617           }
25618     }
25619
25620   /* saved_regs_mask should contain IP which contains old stack pointer
25621      at the time of activation creation.  Since SP and IP are adjacent registers,
25622      we can restore the value directly into SP.  */
25623   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25624   saved_regs_mask &= ~(1 << IP_REGNUM);
25625   saved_regs_mask |= (1 << SP_REGNUM);
25626
25627   /* There are two registers left in saved_regs_mask - LR and PC.  We
25628      only need to restore LR (the return address), but to
25629      save time we can load it directly into PC, unless we need a
25630      special function exit sequence, or we are not really returning.  */
25631   if (really_return
25632       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25633       && !crtl->calls_eh_return)
25634     /* Delete LR from the register mask, so that LR on
25635        the stack is loaded into the PC in the register mask.  */
25636     saved_regs_mask &= ~(1 << LR_REGNUM);
25637   else
25638     saved_regs_mask &= ~(1 << PC_REGNUM);
25639
25640   num_regs = bit_count (saved_regs_mask);
25641   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25642     {
25643       rtx_insn *insn;
25644       emit_insn (gen_blockage ());
25645       /* Unwind the stack to just below the saved registers.  */
25646       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25647                                     hard_frame_pointer_rtx,
25648                                     GEN_INT (- 4 * num_regs)));
25649
25650       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25651                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25652     }
25653
25654   arm_emit_multi_reg_pop (saved_regs_mask);
25655
25656   if (IS_INTERRUPT (func_type))
25657     {
25658       /* Interrupt handlers will have pushed the
25659          IP onto the stack, so restore it now.  */
25660       rtx_insn *insn;
25661       rtx addr = gen_rtx_MEM (SImode,
25662                               gen_rtx_POST_INC (SImode,
25663                               stack_pointer_rtx));
25664       set_mem_alias_set (addr, get_frame_alias_set ());
25665       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25666       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25667                                          gen_rtx_REG (SImode, IP_REGNUM),
25668                                          NULL_RTX);
25669     }
25670
25671   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25672     return;
25673
25674   if (crtl->calls_eh_return)
25675     emit_insn (gen_addsi3 (stack_pointer_rtx,
25676                            stack_pointer_rtx,
25677                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25678
25679   if (IS_STACKALIGN (func_type))
25680     /* Restore the original stack pointer.  Before prologue, the stack was
25681        realigned and the original stack pointer saved in r0.  For details,
25682        see comment in arm_expand_prologue.  */
25683     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25684
25685   emit_jump_insn (simple_return_rtx);
25686 }
25687
25688 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25689    function is not a sibcall.  */
25690 void
25691 arm_expand_epilogue (bool really_return)
25692 {
25693   unsigned long func_type;
25694   unsigned long saved_regs_mask;
25695   int num_regs = 0;
25696   int i;
25697   int amount;
25698   arm_stack_offsets *offsets;
25699
25700   func_type = arm_current_func_type ();
25701
25702   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25703      let output_return_instruction take care of instruction emission if any.  */
25704   if (IS_NAKED (func_type)
25705       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25706     {
25707       if (really_return)
25708         emit_jump_insn (simple_return_rtx);
25709       return;
25710     }
25711
25712   /* If we are throwing an exception, then we really must be doing a
25713      return, so we can't tail-call.  */
25714   gcc_assert (!crtl->calls_eh_return || really_return);
25715
25716   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25717     {
25718       arm_expand_epilogue_apcs_frame (really_return);
25719       return;
25720     }
25721
25722   /* Get frame offsets for ARM.  */
25723   offsets = arm_get_frame_offsets ();
25724   saved_regs_mask = offsets->saved_regs_mask;
25725   num_regs = bit_count (saved_regs_mask);
25726
25727   if (frame_pointer_needed)
25728     {
25729       rtx_insn *insn;
25730       /* Restore stack pointer if necessary.  */
25731       if (TARGET_ARM)
25732         {
25733           /* In ARM mode, frame pointer points to first saved register.
25734              Restore stack pointer to last saved register.  */
25735           amount = offsets->frame - offsets->saved_regs;
25736
25737           /* Force out any pending memory operations that reference stacked data
25738              before stack de-allocation occurs.  */
25739           emit_insn (gen_blockage ());
25740           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25741                             hard_frame_pointer_rtx,
25742                             GEN_INT (amount)));
25743           arm_add_cfa_adjust_cfa_note (insn, amount,
25744                                        stack_pointer_rtx,
25745                                        hard_frame_pointer_rtx);
25746
25747           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25748              deleted.  */
25749           emit_insn (gen_force_register_use (stack_pointer_rtx));
25750         }
25751       else
25752         {
25753           /* In Thumb-2 mode, the frame pointer points to the last saved
25754              register.  */
25755           amount = offsets->locals_base - offsets->saved_regs;
25756           if (amount)
25757             {
25758               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25759                                 hard_frame_pointer_rtx,
25760                                 GEN_INT (amount)));
25761               arm_add_cfa_adjust_cfa_note (insn, amount,
25762                                            hard_frame_pointer_rtx,
25763                                            hard_frame_pointer_rtx);
25764             }
25765
25766           /* Force out any pending memory operations that reference stacked data
25767              before stack de-allocation occurs.  */
25768           emit_insn (gen_blockage ());
25769           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25770                                        hard_frame_pointer_rtx));
25771           arm_add_cfa_adjust_cfa_note (insn, 0,
25772                                        stack_pointer_rtx,
25773                                        hard_frame_pointer_rtx);
25774           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25775              deleted.  */
25776           emit_insn (gen_force_register_use (stack_pointer_rtx));
25777         }
25778     }
25779   else
25780     {
25781       /* Pop off outgoing args and local frame to adjust stack pointer to
25782          last saved register.  */
25783       amount = offsets->outgoing_args - offsets->saved_regs;
25784       if (amount)
25785         {
25786           rtx_insn *tmp;
25787           /* Force out any pending memory operations that reference stacked data
25788              before stack de-allocation occurs.  */
25789           emit_insn (gen_blockage ());
25790           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25791                                        stack_pointer_rtx,
25792                                        GEN_INT (amount)));
25793           arm_add_cfa_adjust_cfa_note (tmp, amount,
25794                                        stack_pointer_rtx, stack_pointer_rtx);
25795           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25796              not deleted.  */
25797           emit_insn (gen_force_register_use (stack_pointer_rtx));
25798         }
25799     }
25800
25801   if (TARGET_HARD_FLOAT)
25802     {
25803       /* Generate VFP register multi-pop.  */
25804       int end_reg = LAST_VFP_REGNUM + 1;
25805
25806       /* Scan the registers in reverse order.  We need to match
25807          any groupings made in the prologue and generate matching
25808          vldm operations.  The need to match groups is because,
25809          unlike pop, vldm can only do consecutive regs.  */
25810       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25811         /* Look for a case where a reg does not need restoring.  */
25812         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25813             && (!df_regs_ever_live_p (i + 1)
25814                 || call_used_regs[i + 1]))
25815           {
25816             /* Restore the regs discovered so far (from reg+2 to
25817                end_reg).  */
25818             if (end_reg > i + 2)
25819               arm_emit_vfp_multi_reg_pop (i + 2,
25820                                           (end_reg - (i + 2)) / 2,
25821                                           stack_pointer_rtx);
25822             end_reg = i;
25823           }
25824
25825       /* Restore the remaining regs that we have discovered (or possibly
25826          even all of them, if the conditional in the for loop never
25827          fired).  */
25828       if (end_reg > i + 2)
25829         arm_emit_vfp_multi_reg_pop (i + 2,
25830                                     (end_reg - (i + 2)) / 2,
25831                                     stack_pointer_rtx);
25832     }
25833
25834   if (TARGET_IWMMXT)
25835     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25836       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25837         {
25838           rtx_insn *insn;
25839           rtx addr = gen_rtx_MEM (V2SImode,
25840                                   gen_rtx_POST_INC (SImode,
25841                                                     stack_pointer_rtx));
25842           set_mem_alias_set (addr, get_frame_alias_set ());
25843           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25844           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25845                                              gen_rtx_REG (V2SImode, i),
25846                                              NULL_RTX);
25847           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25848                                        stack_pointer_rtx, stack_pointer_rtx);
25849         }
25850
25851   if (saved_regs_mask)
25852     {
25853       rtx insn;
25854       bool return_in_pc = false;
25855
25856       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25857           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25858           && !IS_CMSE_ENTRY (func_type)
25859           && !IS_STACKALIGN (func_type)
25860           && really_return
25861           && crtl->args.pretend_args_size == 0
25862           && saved_regs_mask & (1 << LR_REGNUM)
25863           && !crtl->calls_eh_return)
25864         {
25865           saved_regs_mask &= ~(1 << LR_REGNUM);
25866           saved_regs_mask |= (1 << PC_REGNUM);
25867           return_in_pc = true;
25868         }
25869
25870       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25871         {
25872           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25873             if (saved_regs_mask & (1 << i))
25874               {
25875                 rtx addr = gen_rtx_MEM (SImode,
25876                                         gen_rtx_POST_INC (SImode,
25877                                                           stack_pointer_rtx));
25878                 set_mem_alias_set (addr, get_frame_alias_set ());
25879
25880                 if (i == PC_REGNUM)
25881                   {
25882                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25883                     XVECEXP (insn, 0, 0) = ret_rtx;
25884                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25885                                                         addr);
25886                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25887                     insn = emit_jump_insn (insn);
25888                   }
25889                 else
25890                   {
25891                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25892                                                  addr));
25893                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25894                                                        gen_rtx_REG (SImode, i),
25895                                                        NULL_RTX);
25896                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25897                                                  stack_pointer_rtx,
25898                                                  stack_pointer_rtx);
25899                   }
25900               }
25901         }
25902       else
25903         {
25904           if (TARGET_LDRD
25905               && current_tune->prefer_ldrd_strd
25906               && !optimize_function_for_size_p (cfun))
25907             {
25908               if (TARGET_THUMB2)
25909                 thumb2_emit_ldrd_pop (saved_regs_mask);
25910               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25911                 arm_emit_ldrd_pop (saved_regs_mask);
25912               else
25913                 arm_emit_multi_reg_pop (saved_regs_mask);
25914             }
25915           else
25916             arm_emit_multi_reg_pop (saved_regs_mask);
25917         }
25918
25919       if (return_in_pc)
25920         return;
25921     }
25922
25923   amount
25924     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25925   if (amount)
25926     {
25927       int i, j;
25928       rtx dwarf = NULL_RTX;
25929       rtx_insn *tmp =
25930         emit_insn (gen_addsi3 (stack_pointer_rtx,
25931                                stack_pointer_rtx,
25932                                GEN_INT (amount)));
25933
25934       RTX_FRAME_RELATED_P (tmp) = 1;
25935
25936       if (cfun->machine->uses_anonymous_args)
25937         {
25938           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25939              pretend_args in stack.  */
25940           int num_regs = crtl->args.pretend_args_size / 4;
25941           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25942           for (j = 0, i = 0; j < num_regs; i++)
25943             if (saved_regs_mask & (1 << i))
25944               {
25945                 rtx reg = gen_rtx_REG (SImode, i);
25946                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25947                 j++;
25948               }
25949           REG_NOTES (tmp) = dwarf;
25950         }
25951       arm_add_cfa_adjust_cfa_note (tmp, amount,
25952                                    stack_pointer_rtx, stack_pointer_rtx);
25953     }
25954
25955     /* Clear all caller-saved regs that are not used to return.  */
25956     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25957       {
25958         /* CMSE_ENTRY always returns.  */
25959         gcc_assert (really_return);
25960         cmse_nonsecure_entry_clear_before_return ();
25961       }
25962
25963   if (!really_return)
25964     return;
25965
25966   if (crtl->calls_eh_return)
25967     emit_insn (gen_addsi3 (stack_pointer_rtx,
25968                            stack_pointer_rtx,
25969                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25970
25971   if (IS_STACKALIGN (func_type))
25972     /* Restore the original stack pointer.  Before prologue, the stack was
25973        realigned and the original stack pointer saved in r0.  For details,
25974        see comment in arm_expand_prologue.  */
25975     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25976
25977   emit_jump_insn (simple_return_rtx);
25978 }
25979
25980 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25981    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25982
25983 const char *
25984 thumb1_output_interwork (void)
25985 {
25986   const char * name;
25987   FILE *f = asm_out_file;
25988
25989   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25990   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25991               == SYMBOL_REF);
25992   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25993
25994   /* Generate code sequence to switch us into Thumb mode.  */
25995   /* The .code 32 directive has already been emitted by
25996      ASM_DECLARE_FUNCTION_NAME.  */
25997   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25998   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25999
26000   /* Generate a label, so that the debugger will notice the
26001      change in instruction sets.  This label is also used by
26002      the assembler to bypass the ARM code when this function
26003      is called from a Thumb encoded function elsewhere in the
26004      same file.  Hence the definition of STUB_NAME here must
26005      agree with the definition in gas/config/tc-arm.c.  */
26006
26007 #define STUB_NAME ".real_start_of"
26008
26009   fprintf (f, "\t.code\t16\n");
26010 #ifdef ARM_PE
26011   if (arm_dllexport_name_p (name))
26012     name = arm_strip_name_encoding (name);
26013 #endif
26014   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26015   fprintf (f, "\t.thumb_func\n");
26016   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26017
26018   return "";
26019 }
26020
26021 /* Handle the case of a double word load into a low register from
26022    a computed memory address.  The computed address may involve a
26023    register which is overwritten by the load.  */
26024 const char *
26025 thumb_load_double_from_address (rtx *operands)
26026 {
26027   rtx addr;
26028   rtx base;
26029   rtx offset;
26030   rtx arg1;
26031   rtx arg2;
26032
26033   gcc_assert (REG_P (operands[0]));
26034   gcc_assert (MEM_P (operands[1]));
26035
26036   /* Get the memory address.  */
26037   addr = XEXP (operands[1], 0);
26038
26039   /* Work out how the memory address is computed.  */
26040   switch (GET_CODE (addr))
26041     {
26042     case REG:
26043       operands[2] = adjust_address (operands[1], SImode, 4);
26044
26045       if (REGNO (operands[0]) == REGNO (addr))
26046         {
26047           output_asm_insn ("ldr\t%H0, %2", operands);
26048           output_asm_insn ("ldr\t%0, %1", operands);
26049         }
26050       else
26051         {
26052           output_asm_insn ("ldr\t%0, %1", operands);
26053           output_asm_insn ("ldr\t%H0, %2", operands);
26054         }
26055       break;
26056
26057     case CONST:
26058       /* Compute <address> + 4 for the high order load.  */
26059       operands[2] = adjust_address (operands[1], SImode, 4);
26060
26061       output_asm_insn ("ldr\t%0, %1", operands);
26062       output_asm_insn ("ldr\t%H0, %2", operands);
26063       break;
26064
26065     case PLUS:
26066       arg1   = XEXP (addr, 0);
26067       arg2   = XEXP (addr, 1);
26068
26069       if (CONSTANT_P (arg1))
26070         base = arg2, offset = arg1;
26071       else
26072         base = arg1, offset = arg2;
26073
26074       gcc_assert (REG_P (base));
26075
26076       /* Catch the case of <address> = <reg> + <reg> */
26077       if (REG_P (offset))
26078         {
26079           int reg_offset = REGNO (offset);
26080           int reg_base   = REGNO (base);
26081           int reg_dest   = REGNO (operands[0]);
26082
26083           /* Add the base and offset registers together into the
26084              higher destination register.  */
26085           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26086                        reg_dest + 1, reg_base, reg_offset);
26087
26088           /* Load the lower destination register from the address in
26089              the higher destination register.  */
26090           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26091                        reg_dest, reg_dest + 1);
26092
26093           /* Load the higher destination register from its own address
26094              plus 4.  */
26095           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26096                        reg_dest + 1, reg_dest + 1);
26097         }
26098       else
26099         {
26100           /* Compute <address> + 4 for the high order load.  */
26101           operands[2] = adjust_address (operands[1], SImode, 4);
26102
26103           /* If the computed address is held in the low order register
26104              then load the high order register first, otherwise always
26105              load the low order register first.  */
26106           if (REGNO (operands[0]) == REGNO (base))
26107             {
26108               output_asm_insn ("ldr\t%H0, %2", operands);
26109               output_asm_insn ("ldr\t%0, %1", operands);
26110             }
26111           else
26112             {
26113               output_asm_insn ("ldr\t%0, %1", operands);
26114               output_asm_insn ("ldr\t%H0, %2", operands);
26115             }
26116         }
26117       break;
26118
26119     case LABEL_REF:
26120       /* With no registers to worry about we can just load the value
26121          directly.  */
26122       operands[2] = adjust_address (operands[1], SImode, 4);
26123
26124       output_asm_insn ("ldr\t%H0, %2", operands);
26125       output_asm_insn ("ldr\t%0, %1", operands);
26126       break;
26127
26128     default:
26129       gcc_unreachable ();
26130     }
26131
26132   return "";
26133 }
26134
26135 const char *
26136 thumb_output_move_mem_multiple (int n, rtx *operands)
26137 {
26138   switch (n)
26139     {
26140     case 2:
26141       if (REGNO (operands[4]) > REGNO (operands[5]))
26142         std::swap (operands[4], operands[5]);
26143
26144       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26145       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26146       break;
26147
26148     case 3:
26149       if (REGNO (operands[4]) > REGNO (operands[5]))
26150         std::swap (operands[4], operands[5]);
26151       if (REGNO (operands[5]) > REGNO (operands[6]))
26152         std::swap (operands[5], operands[6]);
26153       if (REGNO (operands[4]) > REGNO (operands[5]))
26154         std::swap (operands[4], operands[5]);
26155
26156       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26157       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26158       break;
26159
26160     default:
26161       gcc_unreachable ();
26162     }
26163
26164   return "";
26165 }
26166
26167 /* Output a call-via instruction for thumb state.  */
26168 const char *
26169 thumb_call_via_reg (rtx reg)
26170 {
26171   int regno = REGNO (reg);
26172   rtx *labelp;
26173
26174   gcc_assert (regno < LR_REGNUM);
26175
26176   /* If we are in the normal text section we can use a single instance
26177      per compilation unit.  If we are doing function sections, then we need
26178      an entry per section, since we can't rely on reachability.  */
26179   if (in_section == text_section)
26180     {
26181       thumb_call_reg_needed = 1;
26182
26183       if (thumb_call_via_label[regno] == NULL)
26184         thumb_call_via_label[regno] = gen_label_rtx ();
26185       labelp = thumb_call_via_label + regno;
26186     }
26187   else
26188     {
26189       if (cfun->machine->call_via[regno] == NULL)
26190         cfun->machine->call_via[regno] = gen_label_rtx ();
26191       labelp = cfun->machine->call_via + regno;
26192     }
26193
26194   output_asm_insn ("bl\t%a0", labelp);
26195   return "";
26196 }
26197
26198 /* Routines for generating rtl.  */
26199 void
26200 thumb_expand_movmemqi (rtx *operands)
26201 {
26202   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26203   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26204   HOST_WIDE_INT len = INTVAL (operands[2]);
26205   HOST_WIDE_INT offset = 0;
26206
26207   while (len >= 12)
26208     {
26209       emit_insn (gen_movmem12b (out, in, out, in));
26210       len -= 12;
26211     }
26212
26213   if (len >= 8)
26214     {
26215       emit_insn (gen_movmem8b (out, in, out, in));
26216       len -= 8;
26217     }
26218
26219   if (len >= 4)
26220     {
26221       rtx reg = gen_reg_rtx (SImode);
26222       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26223       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26224       len -= 4;
26225       offset += 4;
26226     }
26227
26228   if (len >= 2)
26229     {
26230       rtx reg = gen_reg_rtx (HImode);
26231       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26232                                               plus_constant (Pmode, in,
26233                                                              offset))));
26234       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26235                                                                 offset)),
26236                             reg));
26237       len -= 2;
26238       offset += 2;
26239     }
26240
26241   if (len)
26242     {
26243       rtx reg = gen_reg_rtx (QImode);
26244       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26245                                               plus_constant (Pmode, in,
26246                                                              offset))));
26247       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26248                                                                 offset)),
26249                             reg));
26250     }
26251 }
26252
26253 void
26254 thumb_reload_out_hi (rtx *operands)
26255 {
26256   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26257 }
26258
26259 /* Return the length of a function name prefix
26260     that starts with the character 'c'.  */
26261 static int
26262 arm_get_strip_length (int c)
26263 {
26264   switch (c)
26265     {
26266     ARM_NAME_ENCODING_LENGTHS
26267       default: return 0;
26268     }
26269 }
26270
26271 /* Return a pointer to a function's name with any
26272    and all prefix encodings stripped from it.  */
26273 const char *
26274 arm_strip_name_encoding (const char *name)
26275 {
26276   int skip;
26277
26278   while ((skip = arm_get_strip_length (* name)))
26279     name += skip;
26280
26281   return name;
26282 }
26283
26284 /* If there is a '*' anywhere in the name's prefix, then
26285    emit the stripped name verbatim, otherwise prepend an
26286    underscore if leading underscores are being used.  */
26287 void
26288 arm_asm_output_labelref (FILE *stream, const char *name)
26289 {
26290   int skip;
26291   int verbatim = 0;
26292
26293   while ((skip = arm_get_strip_length (* name)))
26294     {
26295       verbatim |= (*name == '*');
26296       name += skip;
26297     }
26298
26299   if (verbatim)
26300     fputs (name, stream);
26301   else
26302     asm_fprintf (stream, "%U%s", name);
26303 }
26304
26305 /* This function is used to emit an EABI tag and its associated value.
26306    We emit the numerical value of the tag in case the assembler does not
26307    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26308    the tag name in a comment so that anyone reading the assembler output
26309    will know which tag is being set.
26310
26311    This function is not static because arm-c.c needs it too.  */
26312
26313 void
26314 arm_emit_eabi_attribute (const char *name, int num, int val)
26315 {
26316   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26317   if (flag_verbose_asm || flag_debug_asm)
26318     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26319   asm_fprintf (asm_out_file, "\n");
26320 }
26321
26322 /* This function is used to print CPU tuning information as comment
26323    in assembler file.  Pointers are not printed for now.  */
26324
26325 void
26326 arm_print_tune_info (void)
26327 {
26328   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26329   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26330                current_tune->constant_limit);
26331   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26332                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26333   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26335   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26336                "prefetch.l1_cache_size:\t%d\n",
26337                current_tune->prefetch.l1_cache_size);
26338   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339                "prefetch.l1_cache_line_size:\t%d\n",
26340                current_tune->prefetch.l1_cache_line_size);
26341   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26342                "prefer_constant_pool:\t%d\n",
26343                (int) current_tune->prefer_constant_pool);
26344   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26345                "branch_cost:\t(s:speed, p:predictable)\n");
26346   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26347   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26348                current_tune->branch_cost (false, false));
26349   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26350                current_tune->branch_cost (false, true));
26351   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26352                current_tune->branch_cost (true, false));
26353   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26354                current_tune->branch_cost (true, true));
26355   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26356                "prefer_ldrd_strd:\t%d\n",
26357                (int) current_tune->prefer_ldrd_strd);
26358   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26359                "logical_op_non_short_circuit:\t[%d,%d]\n",
26360                (int) current_tune->logical_op_non_short_circuit_thumb,
26361                (int) current_tune->logical_op_non_short_circuit_arm);
26362   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26363                "prefer_neon_for_64bits:\t%d\n",
26364                (int) current_tune->prefer_neon_for_64bits);
26365   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26366                "disparage_flag_setting_t16_encodings:\t%d\n",
26367                (int) current_tune->disparage_flag_setting_t16_encodings);
26368   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26369                "string_ops_prefer_neon:\t%d\n",
26370                (int) current_tune->string_ops_prefer_neon);
26371   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26372                "max_insns_inline_memset:\t%d\n",
26373                current_tune->max_insns_inline_memset);
26374   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26375                current_tune->fusible_ops);
26376   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26377                (int) current_tune->sched_autopref);
26378 }
26379
26380 /* Print .arch and .arch_extension directives corresponding to the
26381    current architecture configuration.  */
26382 static void
26383 arm_print_asm_arch_directives ()
26384 {
26385   const arch_option *arch
26386     = arm_parse_arch_option_name (all_architectures, "-march",
26387                                   arm_active_target.arch_name);
26388   auto_sbitmap opt_bits (isa_num_bits);
26389
26390   gcc_assert (arch);
26391
26392   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26393   if (!arch->common.extensions)
26394     return;
26395
26396   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26397        opt->name != NULL;
26398        opt++)
26399     {
26400       if (!opt->remove)
26401         {
26402           arm_initialize_isa (opt_bits, opt->isa_bits);
26403
26404           /* If every feature bit of this option is set in the target
26405              ISA specification, print out the option name.  However,
26406              don't print anything if all the bits are part of the
26407              FPU specification.  */
26408           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26409               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26410             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26411         }
26412     }
26413 }
26414
26415 static void
26416 arm_file_start (void)
26417 {
26418   int val;
26419
26420   if (TARGET_BPABI)
26421     {
26422       /* We don't have a specified CPU.  Use the architecture to
26423          generate the tags.
26424
26425          Note: it might be better to do this unconditionally, then the
26426          assembler would not need to know about all new CPU names as
26427          they are added.  */
26428       if (!arm_active_target.core_name)
26429         {
26430           /* armv7ve doesn't support any extensions.  */
26431           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26432             {
26433               /* Keep backward compatability for assemblers
26434                  which don't support armv7ve.  */
26435               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26436               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26437               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26438               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26439               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26440             }
26441           else
26442             arm_print_asm_arch_directives ();
26443         }
26444       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26445         asm_fprintf (asm_out_file, "\t.arch %s\n",
26446                      arm_active_target.core_name + 8);
26447       else
26448         {
26449           const char* truncated_name
26450             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26451           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26452         }
26453
26454       if (print_tune_info)
26455         arm_print_tune_info ();
26456
26457       if (! TARGET_SOFT_FLOAT)
26458         {
26459           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26460             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26461
26462           if (TARGET_HARD_FLOAT_ABI)
26463             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26464         }
26465
26466       /* Some of these attributes only apply when the corresponding features
26467          are used.  However we don't have any easy way of figuring this out.
26468          Conservatively record the setting that would have been used.  */
26469
26470       if (flag_rounding_math)
26471         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26472
26473       if (!flag_unsafe_math_optimizations)
26474         {
26475           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26476           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26477         }
26478       if (flag_signaling_nans)
26479         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26480
26481       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26482                            flag_finite_math_only ? 1 : 3);
26483
26484       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26485       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26486       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26487                                flag_short_enums ? 1 : 2);
26488
26489       /* Tag_ABI_optimization_goals.  */
26490       if (optimize_size)
26491         val = 4;
26492       else if (optimize >= 2)
26493         val = 2;
26494       else if (optimize)
26495         val = 1;
26496       else
26497         val = 6;
26498       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26499
26500       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26501                                unaligned_access);
26502
26503       if (arm_fp16_format)
26504         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26505                              (int) arm_fp16_format);
26506
26507       if (arm_lang_output_object_attributes_hook)
26508         arm_lang_output_object_attributes_hook();
26509     }
26510
26511   default_file_start ();
26512 }
26513
26514 static void
26515 arm_file_end (void)
26516 {
26517   int regno;
26518
26519   if (NEED_INDICATE_EXEC_STACK)
26520     /* Add .note.GNU-stack.  */
26521     file_end_indicate_exec_stack ();
26522
26523   if (! thumb_call_reg_needed)
26524     return;
26525
26526   switch_to_section (text_section);
26527   asm_fprintf (asm_out_file, "\t.code 16\n");
26528   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26529
26530   for (regno = 0; regno < LR_REGNUM; regno++)
26531     {
26532       rtx label = thumb_call_via_label[regno];
26533
26534       if (label != 0)
26535         {
26536           targetm.asm_out.internal_label (asm_out_file, "L",
26537                                           CODE_LABEL_NUMBER (label));
26538           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26539         }
26540     }
26541 }
26542
26543 #ifndef ARM_PE
26544 /* Symbols in the text segment can be accessed without indirecting via the
26545    constant pool; it may take an extra binary operation, but this is still
26546    faster than indirecting via memory.  Don't do this when not optimizing,
26547    since we won't be calculating al of the offsets necessary to do this
26548    simplification.  */
26549
26550 static void
26551 arm_encode_section_info (tree decl, rtx rtl, int first)
26552 {
26553   if (optimize > 0 && TREE_CONSTANT (decl))
26554     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26555
26556   default_encode_section_info (decl, rtl, first);
26557 }
26558 #endif /* !ARM_PE */
26559
26560 static void
26561 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26562 {
26563   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26564       && !strcmp (prefix, "L"))
26565     {
26566       arm_ccfsm_state = 0;
26567       arm_target_insn = NULL;
26568     }
26569   default_internal_label (stream, prefix, labelno);
26570 }
26571
26572 /* Output code to add DELTA to the first argument, and then jump
26573    to FUNCTION.  Used for C++ multiple inheritance.  */
26574
26575 static void
26576 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26577                      HOST_WIDE_INT, tree function)
26578 {
26579   static int thunk_label = 0;
26580   char label[256];
26581   char labelpc[256];
26582   int mi_delta = delta;
26583   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26584   int shift = 0;
26585   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26586                     ? 1 : 0);
26587   if (mi_delta < 0)
26588     mi_delta = - mi_delta;
26589
26590   final_start_function (emit_barrier (), file, 1);
26591
26592   if (TARGET_THUMB1)
26593     {
26594       int labelno = thunk_label++;
26595       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26596       /* Thunks are entered in arm mode when available.  */
26597       if (TARGET_THUMB1_ONLY)
26598         {
26599           /* push r3 so we can use it as a temporary.  */
26600           /* TODO: Omit this save if r3 is not used.  */
26601           fputs ("\tpush {r3}\n", file);
26602           fputs ("\tldr\tr3, ", file);
26603         }
26604       else
26605         {
26606           fputs ("\tldr\tr12, ", file);
26607         }
26608       assemble_name (file, label);
26609       fputc ('\n', file);
26610       if (flag_pic)
26611         {
26612           /* If we are generating PIC, the ldr instruction below loads
26613              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26614              the address of the add + 8, so we have:
26615
26616              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26617                  = target + 1.
26618
26619              Note that we have "+ 1" because some versions of GNU ld
26620              don't set the low bit of the result for R_ARM_REL32
26621              relocations against thumb function symbols.
26622              On ARMv6M this is +4, not +8.  */
26623           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26624           assemble_name (file, labelpc);
26625           fputs (":\n", file);
26626           if (TARGET_THUMB1_ONLY)
26627             {
26628               /* This is 2 insns after the start of the thunk, so we know it
26629                  is 4-byte aligned.  */
26630               fputs ("\tadd\tr3, pc, r3\n", file);
26631               fputs ("\tmov r12, r3\n", file);
26632             }
26633           else
26634             fputs ("\tadd\tr12, pc, r12\n", file);
26635         }
26636       else if (TARGET_THUMB1_ONLY)
26637         fputs ("\tmov r12, r3\n", file);
26638     }
26639   if (TARGET_THUMB1_ONLY)
26640     {
26641       if (mi_delta > 255)
26642         {
26643           fputs ("\tldr\tr3, ", file);
26644           assemble_name (file, label);
26645           fputs ("+4\n", file);
26646           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26647                        mi_op, this_regno, this_regno);
26648         }
26649       else if (mi_delta != 0)
26650         {
26651           /* Thumb1 unified syntax requires s suffix in instruction name when
26652              one of the operands is immediate.  */
26653           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26654                        mi_op, this_regno, this_regno,
26655                        mi_delta);
26656         }
26657     }
26658   else
26659     {
26660       /* TODO: Use movw/movt for large constants when available.  */
26661       while (mi_delta != 0)
26662         {
26663           if ((mi_delta & (3 << shift)) == 0)
26664             shift += 2;
26665           else
26666             {
26667               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26668                            mi_op, this_regno, this_regno,
26669                            mi_delta & (0xff << shift));
26670               mi_delta &= ~(0xff << shift);
26671               shift += 8;
26672             }
26673         }
26674     }
26675   if (TARGET_THUMB1)
26676     {
26677       if (TARGET_THUMB1_ONLY)
26678         fputs ("\tpop\t{r3}\n", file);
26679
26680       fprintf (file, "\tbx\tr12\n");
26681       ASM_OUTPUT_ALIGN (file, 2);
26682       assemble_name (file, label);
26683       fputs (":\n", file);
26684       if (flag_pic)
26685         {
26686           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26687           rtx tem = XEXP (DECL_RTL (function), 0);
26688           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26689              pipeline offset is four rather than eight.  Adjust the offset
26690              accordingly.  */
26691           tem = plus_constant (GET_MODE (tem), tem,
26692                                TARGET_THUMB1_ONLY ? -3 : -7);
26693           tem = gen_rtx_MINUS (GET_MODE (tem),
26694                                tem,
26695                                gen_rtx_SYMBOL_REF (Pmode,
26696                                                    ggc_strdup (labelpc)));
26697           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26698         }
26699       else
26700         /* Output ".word .LTHUNKn".  */
26701         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26702
26703       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26704         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26705     }
26706   else
26707     {
26708       fputs ("\tb\t", file);
26709       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26710       if (NEED_PLT_RELOC)
26711         fputs ("(PLT)", file);
26712       fputc ('\n', file);
26713     }
26714
26715   final_end_function ();
26716 }
26717
26718 /* MI thunk handling for TARGET_32BIT.  */
26719
26720 static void
26721 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26722                        HOST_WIDE_INT vcall_offset, tree function)
26723 {
26724   /* On ARM, this_regno is R0 or R1 depending on
26725      whether the function returns an aggregate or not.
26726   */
26727   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26728                                        function)
26729                     ? R1_REGNUM : R0_REGNUM);
26730
26731   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26732   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26733   reload_completed = 1;
26734   emit_note (NOTE_INSN_PROLOGUE_END);
26735
26736   /* Add DELTA to THIS_RTX.  */
26737   if (delta != 0)
26738     arm_split_constant (PLUS, Pmode, NULL_RTX,
26739                         delta, this_rtx, this_rtx, false);
26740
26741   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26742   if (vcall_offset != 0)
26743     {
26744       /* Load *THIS_RTX.  */
26745       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26746       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26747       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26748                           false);
26749       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26750       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26751       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26752     }
26753
26754   /* Generate a tail call to the target function.  */
26755   if (!TREE_USED (function))
26756     {
26757       assemble_external (function);
26758       TREE_USED (function) = 1;
26759     }
26760   rtx funexp = XEXP (DECL_RTL (function), 0);
26761   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26762   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26763   SIBLING_CALL_P (insn) = 1;
26764
26765   insn = get_insns ();
26766   shorten_branches (insn);
26767   final_start_function (insn, file, 1);
26768   final (insn, file, 1);
26769   final_end_function ();
26770
26771   /* Stop pretending this is a post-reload pass.  */
26772   reload_completed = 0;
26773 }
26774
26775 /* Output code to add DELTA to the first argument, and then jump
26776    to FUNCTION.  Used for C++ multiple inheritance.  */
26777
26778 static void
26779 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26780                      HOST_WIDE_INT vcall_offset, tree function)
26781 {
26782   if (TARGET_32BIT)
26783     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26784   else
26785     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26786 }
26787
26788 int
26789 arm_emit_vector_const (FILE *file, rtx x)
26790 {
26791   int i;
26792   const char * pattern;
26793
26794   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26795
26796   switch (GET_MODE (x))
26797     {
26798     case E_V2SImode: pattern = "%08x"; break;
26799     case E_V4HImode: pattern = "%04x"; break;
26800     case E_V8QImode: pattern = "%02x"; break;
26801     default:       gcc_unreachable ();
26802     }
26803
26804   fprintf (file, "0x");
26805   for (i = CONST_VECTOR_NUNITS (x); i--;)
26806     {
26807       rtx element;
26808
26809       element = CONST_VECTOR_ELT (x, i);
26810       fprintf (file, pattern, INTVAL (element));
26811     }
26812
26813   return 1;
26814 }
26815
26816 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26817    HFmode constant pool entries are actually loaded with ldr.  */
26818 void
26819 arm_emit_fp16_const (rtx c)
26820 {
26821   long bits;
26822
26823   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26824   if (WORDS_BIG_ENDIAN)
26825     assemble_zeros (2);
26826   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26827   if (!WORDS_BIG_ENDIAN)
26828     assemble_zeros (2);
26829 }
26830
26831 const char *
26832 arm_output_load_gr (rtx *operands)
26833 {
26834   rtx reg;
26835   rtx offset;
26836   rtx wcgr;
26837   rtx sum;
26838
26839   if (!MEM_P (operands [1])
26840       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26841       || !REG_P (reg = XEXP (sum, 0))
26842       || !CONST_INT_P (offset = XEXP (sum, 1))
26843       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26844     return "wldrw%?\t%0, %1";
26845
26846   /* Fix up an out-of-range load of a GR register.  */
26847   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26848   wcgr = operands[0];
26849   operands[0] = reg;
26850   output_asm_insn ("ldr%?\t%0, %1", operands);
26851
26852   operands[0] = wcgr;
26853   operands[1] = reg;
26854   output_asm_insn ("tmcr%?\t%0, %1", operands);
26855   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26856
26857   return "";
26858 }
26859
26860 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26861
26862    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26863    named arg and all anonymous args onto the stack.
26864    XXX I know the prologue shouldn't be pushing registers, but it is faster
26865    that way.  */
26866
26867 static void
26868 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26869                             machine_mode mode,
26870                             tree type,
26871                             int *pretend_size,
26872                             int second_time ATTRIBUTE_UNUSED)
26873 {
26874   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26875   int nregs;
26876
26877   cfun->machine->uses_anonymous_args = 1;
26878   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26879     {
26880       nregs = pcum->aapcs_ncrn;
26881       if (nregs & 1)
26882         {
26883           int res = arm_needs_doubleword_align (mode, type);
26884           if (res < 0 && warn_psabi)
26885             inform (input_location, "parameter passing for argument of "
26886                     "type %qT changed in GCC 7.1", type);
26887           else if (res > 0)
26888             nregs++;
26889         }
26890     }
26891   else
26892     nregs = pcum->nregs;
26893
26894   if (nregs < NUM_ARG_REGS)
26895     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26896 }
26897
26898 /* We can't rely on the caller doing the proper promotion when
26899    using APCS or ATPCS.  */
26900
26901 static bool
26902 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26903 {
26904     return !TARGET_AAPCS_BASED;
26905 }
26906
26907 static machine_mode
26908 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26909                            machine_mode mode,
26910                            int *punsignedp ATTRIBUTE_UNUSED,
26911                            const_tree fntype ATTRIBUTE_UNUSED,
26912                            int for_return ATTRIBUTE_UNUSED)
26913 {
26914   if (GET_MODE_CLASS (mode) == MODE_INT
26915       && GET_MODE_SIZE (mode) < 4)
26916     return SImode;
26917
26918   return mode;
26919 }
26920
26921
26922 static bool
26923 arm_default_short_enums (void)
26924 {
26925   return ARM_DEFAULT_SHORT_ENUMS;
26926 }
26927
26928
26929 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26930
26931 static bool
26932 arm_align_anon_bitfield (void)
26933 {
26934   return TARGET_AAPCS_BASED;
26935 }
26936
26937
26938 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26939
26940 static tree
26941 arm_cxx_guard_type (void)
26942 {
26943   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26944 }
26945
26946
26947 /* The EABI says test the least significant bit of a guard variable.  */
26948
26949 static bool
26950 arm_cxx_guard_mask_bit (void)
26951 {
26952   return TARGET_AAPCS_BASED;
26953 }
26954
26955
26956 /* The EABI specifies that all array cookies are 8 bytes long.  */
26957
26958 static tree
26959 arm_get_cookie_size (tree type)
26960 {
26961   tree size;
26962
26963   if (!TARGET_AAPCS_BASED)
26964     return default_cxx_get_cookie_size (type);
26965
26966   size = build_int_cst (sizetype, 8);
26967   return size;
26968 }
26969
26970
26971 /* The EABI says that array cookies should also contain the element size.  */
26972
26973 static bool
26974 arm_cookie_has_size (void)
26975 {
26976   return TARGET_AAPCS_BASED;
26977 }
26978
26979
26980 /* The EABI says constructors and destructors should return a pointer to
26981    the object constructed/destroyed.  */
26982
26983 static bool
26984 arm_cxx_cdtor_returns_this (void)
26985 {
26986   return TARGET_AAPCS_BASED;
26987 }
26988
26989 /* The EABI says that an inline function may never be the key
26990    method.  */
26991
26992 static bool
26993 arm_cxx_key_method_may_be_inline (void)
26994 {
26995   return !TARGET_AAPCS_BASED;
26996 }
26997
26998 static void
26999 arm_cxx_determine_class_data_visibility (tree decl)
27000 {
27001   if (!TARGET_AAPCS_BASED
27002       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27003     return;
27004
27005   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27006      is exported.  However, on systems without dynamic vague linkage,
27007      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27008   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27009     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27010   else
27011     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27012   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27013 }
27014
27015 static bool
27016 arm_cxx_class_data_always_comdat (void)
27017 {
27018   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27019      vague linkage if the class has no key function.  */
27020   return !TARGET_AAPCS_BASED;
27021 }
27022
27023
27024 /* The EABI says __aeabi_atexit should be used to register static
27025    destructors.  */
27026
27027 static bool
27028 arm_cxx_use_aeabi_atexit (void)
27029 {
27030   return TARGET_AAPCS_BASED;
27031 }
27032
27033
27034 void
27035 arm_set_return_address (rtx source, rtx scratch)
27036 {
27037   arm_stack_offsets *offsets;
27038   HOST_WIDE_INT delta;
27039   rtx addr, mem;
27040   unsigned long saved_regs;
27041
27042   offsets = arm_get_frame_offsets ();
27043   saved_regs = offsets->saved_regs_mask;
27044
27045   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27046     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27047   else
27048     {
27049       if (frame_pointer_needed)
27050         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27051       else
27052         {
27053           /* LR will be the first saved register.  */
27054           delta = offsets->outgoing_args - (offsets->frame + 4);
27055
27056
27057           if (delta >= 4096)
27058             {
27059               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27060                                      GEN_INT (delta & ~4095)));
27061               addr = scratch;
27062               delta &= 4095;
27063             }
27064           else
27065             addr = stack_pointer_rtx;
27066
27067           addr = plus_constant (Pmode, addr, delta);
27068         }
27069
27070       /* The store needs to be marked to prevent DSE from deleting
27071          it as dead if it is based on fp.  */
27072       mem = gen_frame_mem (Pmode, addr);
27073       MEM_VOLATILE_P (mem) = true;
27074       emit_move_insn (mem, source);
27075     }
27076 }
27077
27078
27079 void
27080 thumb_set_return_address (rtx source, rtx scratch)
27081 {
27082   arm_stack_offsets *offsets;
27083   HOST_WIDE_INT delta;
27084   HOST_WIDE_INT limit;
27085   int reg;
27086   rtx addr, mem;
27087   unsigned long mask;
27088
27089   emit_use (source);
27090
27091   offsets = arm_get_frame_offsets ();
27092   mask = offsets->saved_regs_mask;
27093   if (mask & (1 << LR_REGNUM))
27094     {
27095       limit = 1024;
27096       /* Find the saved regs.  */
27097       if (frame_pointer_needed)
27098         {
27099           delta = offsets->soft_frame - offsets->saved_args;
27100           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27101           if (TARGET_THUMB1)
27102             limit = 128;
27103         }
27104       else
27105         {
27106           delta = offsets->outgoing_args - offsets->saved_args;
27107           reg = SP_REGNUM;
27108         }
27109       /* Allow for the stack frame.  */
27110       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27111         delta -= 16;
27112       /* The link register is always the first saved register.  */
27113       delta -= 4;
27114
27115       /* Construct the address.  */
27116       addr = gen_rtx_REG (SImode, reg);
27117       if (delta > limit)
27118         {
27119           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27120           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27121           addr = scratch;
27122         }
27123       else
27124         addr = plus_constant (Pmode, addr, delta);
27125
27126       /* The store needs to be marked to prevent DSE from deleting
27127          it as dead if it is based on fp.  */
27128       mem = gen_frame_mem (Pmode, addr);
27129       MEM_VOLATILE_P (mem) = true;
27130       emit_move_insn (mem, source);
27131     }
27132   else
27133     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27134 }
27135
27136 /* Implements target hook vector_mode_supported_p.  */
27137 bool
27138 arm_vector_mode_supported_p (machine_mode mode)
27139 {
27140   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27141   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27142       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27143       || mode == V2DImode || mode == V8HFmode))
27144     return true;
27145
27146   if ((TARGET_NEON || TARGET_IWMMXT)
27147       && ((mode == V2SImode)
27148           || (mode == V4HImode)
27149           || (mode == V8QImode)))
27150     return true;
27151
27152   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27153       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27154       || mode == V2HAmode))
27155     return true;
27156
27157   return false;
27158 }
27159
27160 /* Implements target hook array_mode_supported_p.  */
27161
27162 static bool
27163 arm_array_mode_supported_p (machine_mode mode,
27164                             unsigned HOST_WIDE_INT nelems)
27165 {
27166   if (TARGET_NEON
27167       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27168       && (nelems >= 2 && nelems <= 4))
27169     return true;
27170
27171   return false;
27172 }
27173
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175    registers when autovectorizing for Neon, at least until multiple vector
27176    widths are supported properly by the middle-end.  */
27177
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode)
27180 {
27181   if (TARGET_NEON)
27182     switch (mode)
27183       {
27184       case E_SFmode:
27185         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27186       case E_SImode:
27187         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27188       case E_HImode:
27189         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27190       case E_QImode:
27191         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27192       case E_DImode:
27193         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27194           return V2DImode;
27195         break;
27196
27197       default:;
27198       }
27199
27200   if (TARGET_REALLY_IWMMXT)
27201     switch (mode)
27202       {
27203       case E_SImode:
27204         return V2SImode;
27205       case E_HImode:
27206         return V4HImode;
27207       case E_QImode:
27208         return V8QImode;
27209
27210       default:;
27211       }
27212
27213   return word_mode;
27214 }
27215
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27217
27218    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27219    using r0-r4 for function arguments, r7 for the stack frame and don't have
27220    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27221    potentially problematic instructions accept high registers so this is not
27222    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27223    that require many low registers.  */
27224 static bool
27225 arm_class_likely_spilled_p (reg_class_t rclass)
27226 {
27227   if ((TARGET_THUMB1 && rclass == LO_REGS)
27228       || rclass  == CC_REG)
27229     return true;
27230
27231   return false;
27232 }
27233
27234 /* Implements target hook small_register_classes_for_mode_p.  */
27235 bool
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27237 {
27238   return TARGET_THUMB1;
27239 }
27240
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27242    ARM insns and therefore guarantee that the shift count is modulo 256.
27243    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244    guarantee no particular behavior for out-of-range counts.  */
27245
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode)
27248 {
27249   return mode == SImode ? 255 : 0;
27250 }
27251
27252
27253 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27254
27255 unsigned int
27256 arm_dbx_register_number (unsigned int regno)
27257 {
27258   if (regno < 16)
27259     return regno;
27260
27261   if (IS_VFP_REGNUM (regno))
27262     {
27263       /* See comment in arm_dwarf_register_span.  */
27264       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27265         return 64 + regno - FIRST_VFP_REGNUM;
27266       else
27267         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27268     }
27269
27270   if (IS_IWMMXT_GR_REGNUM (regno))
27271     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27272
27273   if (IS_IWMMXT_REGNUM (regno))
27274     return 112 + regno - FIRST_IWMMXT_REGNUM;
27275
27276   return DWARF_FRAME_REGISTERS;
27277 }
27278
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280    GCC models tham as 64 32-bit registers, so we need to describe this to
27281    the DWARF generation code.  Other registers can use the default.  */
27282 static rtx
27283 arm_dwarf_register_span (rtx rtl)
27284 {
27285   machine_mode mode;
27286   unsigned regno;
27287   rtx parts[16];
27288   int nregs;
27289   int i;
27290
27291   regno = REGNO (rtl);
27292   if (!IS_VFP_REGNUM (regno))
27293     return NULL_RTX;
27294
27295   /* XXX FIXME: The EABI defines two VFP register ranges:
27296         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27297         256-287: D0-D31
27298      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299      corresponding D register.  Until GDB supports this, we shall use the
27300      legacy encodings.  We also use these encodings for D0-D15 for
27301      compatibility with older debuggers.  */
27302   mode = GET_MODE (rtl);
27303   if (GET_MODE_SIZE (mode) < 8)
27304     return NULL_RTX;
27305
27306   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27307     {
27308       nregs = GET_MODE_SIZE (mode) / 4;
27309       for (i = 0; i < nregs; i += 2)
27310         if (TARGET_BIG_END)
27311           {
27312             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27313             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27314           }
27315         else
27316           {
27317             parts[i] = gen_rtx_REG (SImode, regno + i);
27318             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27319           }
27320     }
27321   else
27322     {
27323       nregs = GET_MODE_SIZE (mode) / 8;
27324       for (i = 0; i < nregs; i++)
27325         parts[i] = gen_rtx_REG (DImode, regno + i);
27326     }
27327
27328   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27329 }
27330
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333    push during alignment.
27334    These should only ever be generated by the function prologue code, so
27335    expect them to have a particular form.
27336    The store-multiple instruction sometimes pushes pc as the last register,
27337    although it should not be tracked into unwind information, or for -Os
27338    sometimes pushes some dummy registers before first register that needs
27339    to be tracked in unwind information; such dummy registers are there just
27340    to avoid separate stack adjustment, and will not be restored in the
27341    epilogue.  */
27342
27343 static void
27344 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27345 {
27346   int i;
27347   HOST_WIDE_INT offset;
27348   HOST_WIDE_INT nregs;
27349   int reg_size;
27350   unsigned reg;
27351   unsigned lastreg;
27352   unsigned padfirst = 0, padlast = 0;
27353   rtx e;
27354
27355   e = XVECEXP (p, 0, 0);
27356   gcc_assert (GET_CODE (e) == SET);
27357
27358   /* First insn will adjust the stack pointer.  */
27359   gcc_assert (GET_CODE (e) == SET
27360               && REG_P (SET_DEST (e))
27361               && REGNO (SET_DEST (e)) == SP_REGNUM
27362               && GET_CODE (SET_SRC (e)) == PLUS);
27363
27364   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27365   nregs = XVECLEN (p, 0) - 1;
27366   gcc_assert (nregs);
27367
27368   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27369   if (reg < 16)
27370     {
27371       /* For -Os dummy registers can be pushed at the beginning to
27372          avoid separate stack pointer adjustment.  */
27373       e = XVECEXP (p, 0, 1);
27374       e = XEXP (SET_DEST (e), 0);
27375       if (GET_CODE (e) == PLUS)
27376         padfirst = INTVAL (XEXP (e, 1));
27377       gcc_assert (padfirst == 0 || optimize_size);
27378       /* The function prologue may also push pc, but not annotate it as it is
27379          never restored.  We turn this into a stack pointer adjustment.  */
27380       e = XVECEXP (p, 0, nregs);
27381       e = XEXP (SET_DEST (e), 0);
27382       if (GET_CODE (e) == PLUS)
27383         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27384       else
27385         padlast = offset - 4;
27386       gcc_assert (padlast == 0 || padlast == 4);
27387       if (padlast == 4)
27388         fprintf (asm_out_file, "\t.pad #4\n");
27389       reg_size = 4;
27390       fprintf (asm_out_file, "\t.save {");
27391     }
27392   else if (IS_VFP_REGNUM (reg))
27393     {
27394       reg_size = 8;
27395       fprintf (asm_out_file, "\t.vsave {");
27396     }
27397   else
27398     /* Unknown register type.  */
27399     gcc_unreachable ();
27400
27401   /* If the stack increment doesn't match the size of the saved registers,
27402      something has gone horribly wrong.  */
27403   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27404
27405   offset = padfirst;
27406   lastreg = 0;
27407   /* The remaining insns will describe the stores.  */
27408   for (i = 1; i <= nregs; i++)
27409     {
27410       /* Expect (set (mem <addr>) (reg)).
27411          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27412       e = XVECEXP (p, 0, i);
27413       gcc_assert (GET_CODE (e) == SET
27414                   && MEM_P (SET_DEST (e))
27415                   && REG_P (SET_SRC (e)));
27416
27417       reg = REGNO (SET_SRC (e));
27418       gcc_assert (reg >= lastreg);
27419
27420       if (i != 1)
27421         fprintf (asm_out_file, ", ");
27422       /* We can't use %r for vfp because we need to use the
27423          double precision register names.  */
27424       if (IS_VFP_REGNUM (reg))
27425         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27426       else
27427         asm_fprintf (asm_out_file, "%r", reg);
27428
27429       if (flag_checking)
27430         {
27431           /* Check that the addresses are consecutive.  */
27432           e = XEXP (SET_DEST (e), 0);
27433           if (GET_CODE (e) == PLUS)
27434             gcc_assert (REG_P (XEXP (e, 0))
27435                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27436                         && CONST_INT_P (XEXP (e, 1))
27437                         && offset == INTVAL (XEXP (e, 1)));
27438           else
27439             gcc_assert (i == 1
27440                         && REG_P (e)
27441                         && REGNO (e) == SP_REGNUM);
27442           offset += reg_size;
27443         }
27444     }
27445   fprintf (asm_out_file, "}\n");
27446   if (padfirst)
27447     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27448 }
27449
27450 /*  Emit unwind directives for a SET.  */
27451
27452 static void
27453 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27454 {
27455   rtx e0;
27456   rtx e1;
27457   unsigned reg;
27458
27459   e0 = XEXP (p, 0);
27460   e1 = XEXP (p, 1);
27461   switch (GET_CODE (e0))
27462     {
27463     case MEM:
27464       /* Pushing a single register.  */
27465       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27466           || !REG_P (XEXP (XEXP (e0, 0), 0))
27467           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27468         abort ();
27469
27470       asm_fprintf (asm_out_file, "\t.save ");
27471       if (IS_VFP_REGNUM (REGNO (e1)))
27472         asm_fprintf(asm_out_file, "{d%d}\n",
27473                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27474       else
27475         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27476       break;
27477
27478     case REG:
27479       if (REGNO (e0) == SP_REGNUM)
27480         {
27481           /* A stack increment.  */
27482           if (GET_CODE (e1) != PLUS
27483               || !REG_P (XEXP (e1, 0))
27484               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27485               || !CONST_INT_P (XEXP (e1, 1)))
27486             abort ();
27487
27488           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27489                        -INTVAL (XEXP (e1, 1)));
27490         }
27491       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27492         {
27493           HOST_WIDE_INT offset;
27494
27495           if (GET_CODE (e1) == PLUS)
27496             {
27497               if (!REG_P (XEXP (e1, 0))
27498                   || !CONST_INT_P (XEXP (e1, 1)))
27499                 abort ();
27500               reg = REGNO (XEXP (e1, 0));
27501               offset = INTVAL (XEXP (e1, 1));
27502               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27503                            HARD_FRAME_POINTER_REGNUM, reg,
27504                            offset);
27505             }
27506           else if (REG_P (e1))
27507             {
27508               reg = REGNO (e1);
27509               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27510                            HARD_FRAME_POINTER_REGNUM, reg);
27511             }
27512           else
27513             abort ();
27514         }
27515       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27516         {
27517           /* Move from sp to reg.  */
27518           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27519         }
27520      else if (GET_CODE (e1) == PLUS
27521               && REG_P (XEXP (e1, 0))
27522               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27523               && CONST_INT_P (XEXP (e1, 1)))
27524         {
27525           /* Set reg to offset from sp.  */
27526           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27527                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27528         }
27529       else
27530         abort ();
27531       break;
27532
27533     default:
27534       abort ();
27535     }
27536 }
27537
27538
27539 /* Emit unwind directives for the given insn.  */
27540
27541 static void
27542 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27543 {
27544   rtx note, pat;
27545   bool handled_one = false;
27546
27547   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27548     return;
27549
27550   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27551       && (TREE_NOTHROW (current_function_decl)
27552           || crtl->all_throwers_are_sibcalls))
27553     return;
27554
27555   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27556     return;
27557
27558   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27559     {
27560       switch (REG_NOTE_KIND (note))
27561         {
27562         case REG_FRAME_RELATED_EXPR:
27563           pat = XEXP (note, 0);
27564           goto found;
27565
27566         case REG_CFA_REGISTER:
27567           pat = XEXP (note, 0);
27568           if (pat == NULL)
27569             {
27570               pat = PATTERN (insn);
27571               if (GET_CODE (pat) == PARALLEL)
27572                 pat = XVECEXP (pat, 0, 0);
27573             }
27574
27575           /* Only emitted for IS_STACKALIGN re-alignment.  */
27576           {
27577             rtx dest, src;
27578             unsigned reg;
27579
27580             src = SET_SRC (pat);
27581             dest = SET_DEST (pat);
27582
27583             gcc_assert (src == stack_pointer_rtx);
27584             reg = REGNO (dest);
27585             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27586                          reg + 0x90, reg);
27587           }
27588           handled_one = true;
27589           break;
27590
27591         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27592            to get correct dwarf information for shrink-wrap.  We should not
27593            emit unwind information for it because these are used either for
27594            pretend arguments or notes to adjust sp and restore registers from
27595            stack.  */
27596         case REG_CFA_DEF_CFA:
27597         case REG_CFA_ADJUST_CFA:
27598         case REG_CFA_RESTORE:
27599           return;
27600
27601         case REG_CFA_EXPRESSION:
27602         case REG_CFA_OFFSET:
27603           /* ??? Only handling here what we actually emit.  */
27604           gcc_unreachable ();
27605
27606         default:
27607           break;
27608         }
27609     }
27610   if (handled_one)
27611     return;
27612   pat = PATTERN (insn);
27613  found:
27614
27615   switch (GET_CODE (pat))
27616     {
27617     case SET:
27618       arm_unwind_emit_set (asm_out_file, pat);
27619       break;
27620
27621     case SEQUENCE:
27622       /* Store multiple.  */
27623       arm_unwind_emit_sequence (asm_out_file, pat);
27624       break;
27625
27626     default:
27627       abort();
27628     }
27629 }
27630
27631
27632 /* Output a reference from a function exception table to the type_info
27633    object X.  The EABI specifies that the symbol should be relocated by
27634    an R_ARM_TARGET2 relocation.  */
27635
27636 static bool
27637 arm_output_ttype (rtx x)
27638 {
27639   fputs ("\t.word\t", asm_out_file);
27640   output_addr_const (asm_out_file, x);
27641   /* Use special relocations for symbol references.  */
27642   if (!CONST_INT_P (x))
27643     fputs ("(TARGET2)", asm_out_file);
27644   fputc ('\n', asm_out_file);
27645
27646   return TRUE;
27647 }
27648
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27650
27651 static void
27652 arm_asm_emit_except_personality (rtx personality)
27653 {
27654   fputs ("\t.personality\t", asm_out_file);
27655   output_addr_const (asm_out_file, personality);
27656   fputc ('\n', asm_out_file);
27657 }
27658 #endif /* ARM_UNWIND_INFO */
27659
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27661
27662 static void
27663 arm_asm_init_sections (void)
27664 {
27665 #if ARM_UNWIND_INFO
27666   exception_section = get_unnamed_section (0, output_section_asm_op,
27667                                            "\t.handlerdata");
27668 #endif /* ARM_UNWIND_INFO */
27669
27670 #ifdef OBJECT_FORMAT_ELF
27671   if (target_pure_code)
27672     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27673 #endif
27674 }
27675
27676 /* Output unwind directives for the start/end of a function.  */
27677
27678 void
27679 arm_output_fn_unwind (FILE * f, bool prologue)
27680 {
27681   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27682     return;
27683
27684   if (prologue)
27685     fputs ("\t.fnstart\n", f);
27686   else
27687     {
27688       /* If this function will never be unwound, then mark it as such.
27689          The came condition is used in arm_unwind_emit to suppress
27690          the frame annotations.  */
27691       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27692           && (TREE_NOTHROW (current_function_decl)
27693               || crtl->all_throwers_are_sibcalls))
27694         fputs("\t.cantunwind\n", f);
27695
27696       fputs ("\t.fnend\n", f);
27697     }
27698 }
27699
27700 static bool
27701 arm_emit_tls_decoration (FILE *fp, rtx x)
27702 {
27703   enum tls_reloc reloc;
27704   rtx val;
27705
27706   val = XVECEXP (x, 0, 0);
27707   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27708
27709   output_addr_const (fp, val);
27710
27711   switch (reloc)
27712     {
27713     case TLS_GD32:
27714       fputs ("(tlsgd)", fp);
27715       break;
27716     case TLS_LDM32:
27717       fputs ("(tlsldm)", fp);
27718       break;
27719     case TLS_LDO32:
27720       fputs ("(tlsldo)", fp);
27721       break;
27722     case TLS_IE32:
27723       fputs ("(gottpoff)", fp);
27724       break;
27725     case TLS_LE32:
27726       fputs ("(tpoff)", fp);
27727       break;
27728     case TLS_DESCSEQ:
27729       fputs ("(tlsdesc)", fp);
27730       break;
27731     default:
27732       gcc_unreachable ();
27733     }
27734
27735   switch (reloc)
27736     {
27737     case TLS_GD32:
27738     case TLS_LDM32:
27739     case TLS_IE32:
27740     case TLS_DESCSEQ:
27741       fputs (" + (. - ", fp);
27742       output_addr_const (fp, XVECEXP (x, 0, 2));
27743       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27745       output_addr_const (fp, XVECEXP (x, 0, 3));
27746       fputc (')', fp);
27747       break;
27748     default:
27749       break;
27750     }
27751
27752   return TRUE;
27753 }
27754
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27756
27757 static void
27758 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27759 {
27760   gcc_assert (size == 4);
27761   fputs ("\t.word\t", file);
27762   output_addr_const (file, x);
27763   fputs ("(tlsldo)", file);
27764 }
27765
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27767
27768 static bool
27769 arm_output_addr_const_extra (FILE *fp, rtx x)
27770 {
27771   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27772     return arm_emit_tls_decoration (fp, x);
27773   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27774     {
27775       char label[256];
27776       int labelno = INTVAL (XVECEXP (x, 0, 0));
27777
27778       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27779       assemble_name_raw (fp, label);
27780
27781       return TRUE;
27782     }
27783   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27784     {
27785       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27786       if (GOT_PCREL)
27787         fputs ("+.", fp);
27788       fputs ("-(", fp);
27789       output_addr_const (fp, XVECEXP (x, 0, 0));
27790       fputc (')', fp);
27791       return TRUE;
27792     }
27793   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27794     {
27795       output_addr_const (fp, XVECEXP (x, 0, 0));
27796       if (GOT_PCREL)
27797         fputs ("+.", fp);
27798       fputs ("-(", fp);
27799       output_addr_const (fp, XVECEXP (x, 0, 1));
27800       fputc (')', fp);
27801       return TRUE;
27802     }
27803   else if (GET_CODE (x) == CONST_VECTOR)
27804     return arm_emit_vector_const (fp, x);
27805
27806   return FALSE;
27807 }
27808
27809 /* Output assembly for a shift instruction.
27810    SET_FLAGS determines how the instruction modifies the condition codes.
27811    0 - Do not set condition codes.
27812    1 - Set condition codes.
27813    2 - Use smallest instruction.  */
27814 const char *
27815 arm_output_shift(rtx * operands, int set_flags)
27816 {
27817   char pattern[100];
27818   static const char flag_chars[3] = {'?', '.', '!'};
27819   const char *shift;
27820   HOST_WIDE_INT val;
27821   char c;
27822
27823   c = flag_chars[set_flags];
27824   shift = shift_op(operands[3], &val);
27825   if (shift)
27826     {
27827       if (val != -1)
27828         operands[2] = GEN_INT(val);
27829       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27830     }
27831   else
27832     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27833
27834   output_asm_insn (pattern, operands);
27835   return "";
27836 }
27837
27838 /* Output assembly for a WMMX immediate shift instruction.  */
27839 const char *
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27841 {
27842   int shift = INTVAL (operands[2]);
27843   char templ[50];
27844   machine_mode opmode = GET_MODE (operands[0]);
27845
27846   gcc_assert (shift >= 0);
27847
27848   /* If the shift value in the register versions is > 63 (for D qualifier),
27849      31 (for W qualifier) or 15 (for H qualifier).  */
27850   if (((opmode == V4HImode) && (shift > 15))
27851         || ((opmode == V2SImode) && (shift > 31))
27852         || ((opmode == DImode) && (shift > 63)))
27853   {
27854     if (wror_or_wsra)
27855       {
27856         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27857         output_asm_insn (templ, operands);
27858         if (opmode == DImode)
27859           {
27860             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27861             output_asm_insn (templ, operands);
27862           }
27863       }
27864     else
27865       {
27866         /* The destination register will contain all zeros.  */
27867         sprintf (templ, "wzero\t%%0");
27868         output_asm_insn (templ, operands);
27869       }
27870     return "";
27871   }
27872
27873   if ((opmode == DImode) && (shift > 32))
27874     {
27875       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27876       output_asm_insn (templ, operands);
27877       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27878       output_asm_insn (templ, operands);
27879     }
27880   else
27881     {
27882       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27883       output_asm_insn (templ, operands);
27884     }
27885   return "";
27886 }
27887
27888 /* Output assembly for a WMMX tinsr instruction.  */
27889 const char *
27890 arm_output_iwmmxt_tinsr (rtx *operands)
27891 {
27892   int mask = INTVAL (operands[3]);
27893   int i;
27894   char templ[50];
27895   int units = mode_nunits[GET_MODE (operands[0])];
27896   gcc_assert ((mask & (mask - 1)) == 0);
27897   for (i = 0; i < units; ++i)
27898     {
27899       if ((mask & 0x01) == 1)
27900         {
27901           break;
27902         }
27903       mask >>= 1;
27904     }
27905   gcc_assert (i < units);
27906   {
27907     switch (GET_MODE (operands[0]))
27908       {
27909       case E_V8QImode:
27910         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27911         break;
27912       case E_V4HImode:
27913         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27914         break;
27915       case E_V2SImode:
27916         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27917         break;
27918       default:
27919         gcc_unreachable ();
27920         break;
27921       }
27922     output_asm_insn (templ, operands);
27923   }
27924   return "";
27925 }
27926
27927 /* Output a Thumb-1 casesi dispatch sequence.  */
27928 const char *
27929 thumb1_output_casesi (rtx *operands)
27930 {
27931   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27932
27933   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27934
27935   switch (GET_MODE(diff_vec))
27936     {
27937     case E_QImode:
27938       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27939               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27940     case E_HImode:
27941       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27942               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27943     case E_SImode:
27944       return "bl\t%___gnu_thumb1_case_si";
27945     default:
27946       gcc_unreachable ();
27947     }
27948 }
27949
27950 /* Output a Thumb-2 casesi instruction.  */
27951 const char *
27952 thumb2_output_casesi (rtx *operands)
27953 {
27954   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27955
27956   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27957
27958   output_asm_insn ("cmp\t%0, %1", operands);
27959   output_asm_insn ("bhi\t%l3", operands);
27960   switch (GET_MODE(diff_vec))
27961     {
27962     case E_QImode:
27963       return "tbb\t[%|pc, %0]";
27964     case E_HImode:
27965       return "tbh\t[%|pc, %0, lsl #1]";
27966     case E_SImode:
27967       if (flag_pic)
27968         {
27969           output_asm_insn ("adr\t%4, %l2", operands);
27970           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27971           output_asm_insn ("add\t%4, %4, %5", operands);
27972           return "bx\t%4";
27973         }
27974       else
27975         {
27976           output_asm_insn ("adr\t%4, %l2", operands);
27977           return "ldr\t%|pc, [%4, %0, lsl #2]";
27978         }
27979     default:
27980       gcc_unreachable ();
27981     }
27982 }
27983
27984 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27985    per-core tuning structs.  */
27986 static int
27987 arm_issue_rate (void)
27988 {
27989   return current_tune->issue_rate;
27990 }
27991
27992 /* Return how many instructions should scheduler lookahead to choose the
27993    best one.  */
27994 static int
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27996 {
27997   int issue_rate = arm_issue_rate ();
27998
27999   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28000 }
28001
28002 /* Enable modeling of L2 auto-prefetcher.  */
28003 static int
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28005 {
28006   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28007 }
28008
28009 const char *
28010 arm_mangle_type (const_tree type)
28011 {
28012   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013      has to be managled as if it is in the "std" namespace.  */
28014   if (TARGET_AAPCS_BASED
28015       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28016     return "St9__va_list";
28017
28018   /* Half-precision float.  */
28019   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28020     return "Dh";
28021
28022   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28023      builtin type.  */
28024   if (TYPE_NAME (type) != NULL)
28025     return arm_mangle_builtin_type (type);
28026
28027   /* Use the default mangling.  */
28028   return NULL;
28029 }
28030
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032    written over the corresponding initial entries of the array
28033    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28034    first.  Saving and restoring a low register is usually cheaper than
28035    using a call-clobbered high register.  */
28036
28037 static const int thumb_core_reg_alloc_order[] =
28038 {
28039    3,  2,  1,  0,  4,  5,  6,  7,
28040   12, 14,  8,  9, 10, 11
28041 };
28042
28043 /* Adjust register allocation order when compiling for Thumb.  */
28044
28045 void
28046 arm_order_regs_for_local_alloc (void)
28047 {
28048   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28049   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28050   if (TARGET_THUMB)
28051     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28052             sizeof (thumb_core_reg_alloc_order));
28053 }
28054
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28056
28057 bool
28058 arm_frame_pointer_required (void)
28059 {
28060   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28061     return true;
28062
28063   /* If the function receives nonlocal gotos, it needs to save the frame
28064      pointer in the nonlocal_goto_save_area object.  */
28065   if (cfun->has_nonlocal_label)
28066     return true;
28067
28068   /* The frame pointer is required for non-leaf APCS frames.  */
28069   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28070     return true;
28071
28072   /* If we are probing the stack in the prologue, we will have a faulting
28073      instruction prior to the stack adjustment and this requires a frame
28074      pointer if we want to catch the exception using the EABI unwinder.  */
28075   if (!IS_INTERRUPT (arm_current_func_type ())
28076       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28077           || flag_stack_clash_protection)
28078       && arm_except_unwind_info (&global_options) == UI_TARGET
28079       && cfun->can_throw_non_call_exceptions)
28080     {
28081       HOST_WIDE_INT size = get_frame_size ();
28082
28083       /* That's irrelevant if there is no stack adjustment.  */
28084       if (size <= 0)
28085         return false;
28086
28087       /* That's relevant only if there is a stack probe.  */
28088       if (crtl->is_leaf && !cfun->calls_alloca)
28089         {
28090           /* We don't have the final size of the frame so adjust.  */
28091           size += 32 * UNITS_PER_WORD;
28092           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28093             return true;
28094         }
28095       else
28096         return true;
28097     }
28098
28099   return false;
28100 }
28101
28102 /* Only thumb1 can't support conditional execution, so return true if
28103    the target is not thumb1.  */
28104 static bool
28105 arm_have_conditional_execution (void)
28106 {
28107   return !TARGET_THUMB1;
28108 }
28109
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type)
28113 {
28114   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28115
28116   if (TARGET_AAPCS_BASED)
28117     align = MIN (align, 64);
28118
28119   return align;
28120 }
28121
28122 static unsigned int
28123 arm_autovectorize_vector_sizes (void)
28124 {
28125   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28126 }
28127
28128 static bool
28129 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28130 {
28131   /* Vectors which aren't in packed structures will not be less aligned than
28132      the natural alignment of their element type, so this is safe.  */
28133   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28134     return !is_packed;
28135
28136   return default_builtin_vector_alignment_reachable (type, is_packed);
28137 }
28138
28139 static bool
28140 arm_builtin_support_vector_misalignment (machine_mode mode,
28141                                          const_tree type, int misalignment,
28142                                          bool is_packed)
28143 {
28144   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28145     {
28146       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28147
28148       if (is_packed)
28149         return align == 1;
28150
28151       /* If the misalignment is unknown, we should be able to handle the access
28152          so long as it is not to a member of a packed data structure.  */
28153       if (misalignment == -1)
28154         return true;
28155
28156       /* Return true if the misalignment is a multiple of the natural alignment
28157          of the vector's element type.  This is probably always going to be
28158          true in practice, since we've already established that this isn't a
28159          packed access.  */
28160       return ((misalignment % align) == 0);
28161     }
28162
28163   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28164                                                       is_packed);
28165 }
28166
28167 static void
28168 arm_conditional_register_usage (void)
28169 {
28170   int regno;
28171
28172   if (TARGET_THUMB1 && optimize_size)
28173     {
28174       /* When optimizing for size on Thumb-1, it's better not
28175         to use the HI regs, because of the overhead of
28176         stacking them.  */
28177       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28178         fixed_regs[regno] = call_used_regs[regno] = 1;
28179     }
28180
28181   /* The link register can be clobbered by any branch insn,
28182      but we have no way to track that at present, so mark
28183      it as unavailable.  */
28184   if (TARGET_THUMB1)
28185     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28186
28187   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28188     {
28189       /* VFPv3 registers are disabled when earlier VFP
28190          versions are selected due to the definition of
28191          LAST_VFP_REGNUM.  */
28192       for (regno = FIRST_VFP_REGNUM;
28193            regno <= LAST_VFP_REGNUM; ++ regno)
28194         {
28195           fixed_regs[regno] = 0;
28196           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28197             || regno >= FIRST_VFP_REGNUM + 32;
28198         }
28199     }
28200
28201   if (TARGET_REALLY_IWMMXT)
28202     {
28203       regno = FIRST_IWMMXT_GR_REGNUM;
28204       /* The 2002/10/09 revision of the XScale ABI has wCG0
28205          and wCG1 as call-preserved registers.  The 2002/11/21
28206          revision changed this so that all wCG registers are
28207          scratch registers.  */
28208       for (regno = FIRST_IWMMXT_GR_REGNUM;
28209            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28210         fixed_regs[regno] = 0;
28211       /* The XScale ABI has wR0 - wR9 as scratch registers,
28212          the rest as call-preserved registers.  */
28213       for (regno = FIRST_IWMMXT_REGNUM;
28214            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28215         {
28216           fixed_regs[regno] = 0;
28217           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28218         }
28219     }
28220
28221   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28222     {
28223       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28224       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28225     }
28226   else if (TARGET_APCS_STACK)
28227     {
28228       fixed_regs[10]     = 1;
28229       call_used_regs[10] = 1;
28230     }
28231   /* -mcaller-super-interworking reserves r11 for calls to
28232      _interwork_r11_call_via_rN().  Making the register global
28233      is an easy way of ensuring that it remains valid for all
28234      calls.  */
28235   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28236       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28237     {
28238       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28239       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28240       if (TARGET_CALLER_INTERWORKING)
28241         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28242     }
28243   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28244 }
28245
28246 static reg_class_t
28247 arm_preferred_rename_class (reg_class_t rclass)
28248 {
28249   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28250      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28251      and code size can be reduced.  */
28252   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28253     return LO_REGS;
28254   else
28255     return NO_REGS;
28256 }
28257
28258 /* Compute the attribute "length" of insn "*push_multi".
28259    So this function MUST be kept in sync with that insn pattern.  */
28260 int
28261 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28262 {
28263   int i, regno, hi_reg;
28264   int num_saves = XVECLEN (parallel_op, 0);
28265
28266   /* ARM mode.  */
28267   if (TARGET_ARM)
28268     return 4;
28269   /* Thumb1 mode.  */
28270   if (TARGET_THUMB1)
28271     return 2;
28272
28273   /* Thumb2 mode.  */
28274   regno = REGNO (first_op);
28275   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28276      list is 8-bit.  Normally this means all registers in the list must be
28277      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28278      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28279      with 16-bit encoding.  */
28280   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28281   for (i = 1; i < num_saves && !hi_reg; i++)
28282     {
28283       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28284       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28285     }
28286
28287   if (!hi_reg)
28288     return 2;
28289   return 4;
28290 }
28291
28292 /* Compute the attribute "length" of insn.  Currently, this function is used
28293    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28294    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28295    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28296    true if OPERANDS contains insn which explicit updates base register.  */
28297
28298 int
28299 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28300 {
28301   /* ARM mode.  */
28302   if (TARGET_ARM)
28303     return 4;
28304   /* Thumb1 mode.  */
28305   if (TARGET_THUMB1)
28306     return 2;
28307
28308   rtx parallel_op = operands[0];
28309   /* Initialize to elements number of PARALLEL.  */
28310   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28311   /* Initialize the value to base register.  */
28312   unsigned regno = REGNO (operands[1]);
28313   /* Skip return and write back pattern.
28314      We only need register pop pattern for later analysis.  */
28315   unsigned first_indx = 0;
28316   first_indx += return_pc ? 1 : 0;
28317   first_indx += write_back_p ? 1 : 0;
28318
28319   /* A pop operation can be done through LDM or POP.  If the base register is SP
28320      and if it's with write back, then a LDM will be alias of POP.  */
28321   bool pop_p = (regno == SP_REGNUM && write_back_p);
28322   bool ldm_p = !pop_p;
28323
28324   /* Check base register for LDM.  */
28325   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28326     return 4;
28327
28328   /* Check each register in the list.  */
28329   for (; indx >= first_indx; indx--)
28330     {
28331       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28332       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28333          comment in arm_attr_length_push_multi.  */
28334       if (REGNO_REG_CLASS (regno) == HI_REGS
28335           && (regno != PC_REGNUM || ldm_p))
28336         return 4;
28337     }
28338
28339   return 2;
28340 }
28341
28342 /* Compute the number of instructions emitted by output_move_double.  */
28343 int
28344 arm_count_output_move_double_insns (rtx *operands)
28345 {
28346   int count;
28347   rtx ops[2];
28348   /* output_move_double may modify the operands array, so call it
28349      here on a copy of the array.  */
28350   ops[0] = operands[0];
28351   ops[1] = operands[1];
28352   output_move_double (ops, false, &count);
28353   return count;
28354 }
28355
28356 int
28357 vfp3_const_double_for_fract_bits (rtx operand)
28358 {
28359   REAL_VALUE_TYPE r0;
28360
28361   if (!CONST_DOUBLE_P (operand))
28362     return 0;
28363
28364   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28365   if (exact_real_inverse (DFmode, &r0)
28366       && !REAL_VALUE_NEGATIVE (r0))
28367     {
28368       if (exact_real_truncate (DFmode, &r0))
28369         {
28370           HOST_WIDE_INT value = real_to_integer (&r0);
28371           value = value & 0xffffffff;
28372           if ((value != 0) && ( (value & (value - 1)) == 0))
28373             {
28374               int ret = exact_log2 (value);
28375               gcc_assert (IN_RANGE (ret, 0, 31));
28376               return ret;
28377             }
28378         }
28379     }
28380   return 0;
28381 }
28382
28383 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28384    log2 is in [1, 32], return that log2.  Otherwise return -1.
28385    This is used in the patterns for vcvt.s32.f32 floating-point to
28386    fixed-point conversions.  */
28387
28388 int
28389 vfp3_const_double_for_bits (rtx x)
28390 {
28391   const REAL_VALUE_TYPE *r;
28392
28393   if (!CONST_DOUBLE_P (x))
28394     return -1;
28395
28396   r = CONST_DOUBLE_REAL_VALUE (x);
28397
28398   if (REAL_VALUE_NEGATIVE (*r)
28399       || REAL_VALUE_ISNAN (*r)
28400       || REAL_VALUE_ISINF (*r)
28401       || !real_isinteger (r, SFmode))
28402     return -1;
28403
28404   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28405
28406 /* The exact_log2 above will have returned -1 if this is
28407    not an exact log2.  */
28408   if (!IN_RANGE (hwint, 1, 32))
28409     return -1;
28410
28411   return hwint;
28412 }
28413
28414 \f
28415 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28416
28417 static void
28418 arm_pre_atomic_barrier (enum memmodel model)
28419 {
28420   if (need_atomic_barrier_p (model, true))
28421     emit_insn (gen_memory_barrier ());
28422 }
28423
28424 static void
28425 arm_post_atomic_barrier (enum memmodel model)
28426 {
28427   if (need_atomic_barrier_p (model, false))
28428     emit_insn (gen_memory_barrier ());
28429 }
28430
28431 /* Emit the load-exclusive and store-exclusive instructions.
28432    Use acquire and release versions if necessary.  */
28433
28434 static void
28435 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28436 {
28437   rtx (*gen) (rtx, rtx);
28438
28439   if (acq)
28440     {
28441       switch (mode)
28442         {
28443         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28444         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28445         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28446         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28447         default:
28448           gcc_unreachable ();
28449         }
28450     }
28451   else
28452     {
28453       switch (mode)
28454         {
28455         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28456         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28457         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28458         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28459         default:
28460           gcc_unreachable ();
28461         }
28462     }
28463
28464   emit_insn (gen (rval, mem));
28465 }
28466
28467 static void
28468 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28469                           rtx mem, bool rel)
28470 {
28471   rtx (*gen) (rtx, rtx, rtx);
28472
28473   if (rel)
28474     {
28475       switch (mode)
28476         {
28477         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28478         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28479         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28480         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28481         default:
28482           gcc_unreachable ();
28483         }
28484     }
28485   else
28486     {
28487       switch (mode)
28488         {
28489         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28490         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28491         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28492         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28493         default:
28494           gcc_unreachable ();
28495         }
28496     }
28497
28498   emit_insn (gen (bval, rval, mem));
28499 }
28500
28501 /* Mark the previous jump instruction as unlikely.  */
28502
28503 static void
28504 emit_unlikely_jump (rtx insn)
28505 {
28506   rtx_insn *jump = emit_jump_insn (insn);
28507   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28508 }
28509
28510 /* Expand a compare and swap pattern.  */
28511
28512 void
28513 arm_expand_compare_and_swap (rtx operands[])
28514 {
28515   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28516   machine_mode mode;
28517   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28518
28519   bval = operands[0];
28520   rval = operands[1];
28521   mem = operands[2];
28522   oldval = operands[3];
28523   newval = operands[4];
28524   is_weak = operands[5];
28525   mod_s = operands[6];
28526   mod_f = operands[7];
28527   mode = GET_MODE (mem);
28528
28529   /* Normally the succ memory model must be stronger than fail, but in the
28530      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28531      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28532
28533   if (TARGET_HAVE_LDACQ
28534       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28535       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28536     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28537
28538   switch (mode)
28539     {
28540     case E_QImode:
28541     case E_HImode:
28542       /* For narrow modes, we're going to perform the comparison in SImode,
28543          so do the zero-extension now.  */
28544       rval = gen_reg_rtx (SImode);
28545       oldval = convert_modes (SImode, mode, oldval, true);
28546       /* FALLTHRU */
28547
28548     case E_SImode:
28549       /* Force the value into a register if needed.  We waited until after
28550          the zero-extension above to do this properly.  */
28551       if (!arm_add_operand (oldval, SImode))
28552         oldval = force_reg (SImode, oldval);
28553       break;
28554
28555     case E_DImode:
28556       if (!cmpdi_operand (oldval, mode))
28557         oldval = force_reg (mode, oldval);
28558       break;
28559
28560     default:
28561       gcc_unreachable ();
28562     }
28563
28564   if (TARGET_THUMB1)
28565     {
28566       switch (mode)
28567         {
28568         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28569         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28570         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28571         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28572         default:
28573           gcc_unreachable ();
28574         }
28575     }
28576   else
28577     {
28578       switch (mode)
28579         {
28580         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28581         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28582         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28583         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28584         default:
28585           gcc_unreachable ();
28586         }
28587     }
28588
28589   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28590   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28591
28592   if (mode == QImode || mode == HImode)
28593     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28594
28595   /* In all cases, we arrange for success to be signaled by Z set.
28596      This arrangement allows for the boolean result to be used directly
28597      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28598      boolean negation of the result is also stored in bval because Thumb-1
28599      backend lacks dependency tracking for CC flag due to flag-setting not
28600      being represented at RTL level.  */
28601   if (TARGET_THUMB1)
28602       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28603   else
28604     {
28605       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28606       emit_insn (gen_rtx_SET (bval, x));
28607     }
28608 }
28609
28610 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28611    another memory store between the load-exclusive and store-exclusive can
28612    reset the monitor from Exclusive to Open state.  This means we must wait
28613    until after reload to split the pattern, lest we get a register spill in
28614    the middle of the atomic sequence.  Success of the compare and swap is
28615    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28616    for Thumb-1 targets (ie. negation of the boolean value returned by
28617    atomic_compare_and_swapmode standard pattern in operand 0).  */
28618
28619 void
28620 arm_split_compare_and_swap (rtx operands[])
28621 {
28622   rtx rval, mem, oldval, newval, neg_bval;
28623   machine_mode mode;
28624   enum memmodel mod_s, mod_f;
28625   bool is_weak;
28626   rtx_code_label *label1, *label2;
28627   rtx x, cond;
28628
28629   rval = operands[1];
28630   mem = operands[2];
28631   oldval = operands[3];
28632   newval = operands[4];
28633   is_weak = (operands[5] != const0_rtx);
28634   mod_s = memmodel_from_int (INTVAL (operands[6]));
28635   mod_f = memmodel_from_int (INTVAL (operands[7]));
28636   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28637   mode = GET_MODE (mem);
28638
28639   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28640
28641   bool use_acquire = TARGET_HAVE_LDACQ
28642                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28643                           || is_mm_release (mod_s));
28644
28645   bool use_release = TARGET_HAVE_LDACQ
28646                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28647                           || is_mm_acquire (mod_s));
28648
28649   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28650      a full barrier is emitted after the store-release.  */
28651   if (is_armv8_sync)
28652     use_acquire = false;
28653
28654   /* Checks whether a barrier is needed and emits one accordingly.  */
28655   if (!(use_acquire || use_release))
28656     arm_pre_atomic_barrier (mod_s);
28657
28658   label1 = NULL;
28659   if (!is_weak)
28660     {
28661       label1 = gen_label_rtx ();
28662       emit_label (label1);
28663     }
28664   label2 = gen_label_rtx ();
28665
28666   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28667
28668   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28669      as required to communicate with arm_expand_compare_and_swap.  */
28670   if (TARGET_32BIT)
28671     {
28672       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28673       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28674       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28675                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28676       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28677     }
28678   else
28679     {
28680       emit_move_insn (neg_bval, const1_rtx);
28681       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28682       if (thumb1_cmpneg_operand (oldval, SImode))
28683         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28684                                                     label2, cond));
28685       else
28686         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28687     }
28688
28689   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28690
28691   /* Weak or strong, we want EQ to be true for success, so that we
28692      match the flags that we got from the compare above.  */
28693   if (TARGET_32BIT)
28694     {
28695       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28696       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28697       emit_insn (gen_rtx_SET (cond, x));
28698     }
28699
28700   if (!is_weak)
28701     {
28702       /* Z is set to boolean value of !neg_bval, as required to communicate
28703          with arm_expand_compare_and_swap.  */
28704       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28705       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28706     }
28707
28708   if (!is_mm_relaxed (mod_f))
28709     emit_label (label2);
28710
28711   /* Checks whether a barrier is needed and emits one accordingly.  */
28712   if (is_armv8_sync
28713       || !(use_acquire || use_release))
28714     arm_post_atomic_barrier (mod_s);
28715
28716   if (is_mm_relaxed (mod_f))
28717     emit_label (label2);
28718 }
28719
28720 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28721    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28722    operation).  Operation is performed on the content at MEM and on VALUE
28723    following the memory model MODEL_RTX.  The content at MEM before and after
28724    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28725    success of the operation is returned in COND.  Using a scratch register or
28726    an operand register for these determines what result is returned for that
28727    pattern.  */
28728
28729 void
28730 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28731                      rtx value, rtx model_rtx, rtx cond)
28732 {
28733   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28734   machine_mode mode = GET_MODE (mem);
28735   machine_mode wmode = (mode == DImode ? DImode : SImode);
28736   rtx_code_label *label;
28737   bool all_low_regs, bind_old_new;
28738   rtx x;
28739
28740   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28741
28742   bool use_acquire = TARGET_HAVE_LDACQ
28743                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28744                           || is_mm_release (model));
28745
28746   bool use_release = TARGET_HAVE_LDACQ
28747                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28748                           || is_mm_acquire (model));
28749
28750   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28751      a full barrier is emitted after the store-release.  */
28752   if (is_armv8_sync)
28753     use_acquire = false;
28754
28755   /* Checks whether a barrier is needed and emits one accordingly.  */
28756   if (!(use_acquire || use_release))
28757     arm_pre_atomic_barrier (model);
28758
28759   label = gen_label_rtx ();
28760   emit_label (label);
28761
28762   if (new_out)
28763     new_out = gen_lowpart (wmode, new_out);
28764   if (old_out)
28765     old_out = gen_lowpart (wmode, old_out);
28766   else
28767     old_out = new_out;
28768   value = simplify_gen_subreg (wmode, value, mode, 0);
28769
28770   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28771
28772   /* Does the operation require destination and first operand to use the same
28773      register?  This is decided by register constraints of relevant insn
28774      patterns in thumb1.md.  */
28775   gcc_assert (!new_out || REG_P (new_out));
28776   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28777                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28778                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28779   bind_old_new =
28780     (TARGET_THUMB1
28781      && code != SET
28782      && code != MINUS
28783      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28784
28785   /* We want to return the old value while putting the result of the operation
28786      in the same register as the old value so copy the old value over to the
28787      destination register and use that register for the operation.  */
28788   if (old_out && bind_old_new)
28789     {
28790       emit_move_insn (new_out, old_out);
28791       old_out = new_out;
28792     }
28793
28794   switch (code)
28795     {
28796     case SET:
28797       new_out = value;
28798       break;
28799
28800     case NOT:
28801       x = gen_rtx_AND (wmode, old_out, value);
28802       emit_insn (gen_rtx_SET (new_out, x));
28803       x = gen_rtx_NOT (wmode, new_out);
28804       emit_insn (gen_rtx_SET (new_out, x));
28805       break;
28806
28807     case MINUS:
28808       if (CONST_INT_P (value))
28809         {
28810           value = GEN_INT (-INTVAL (value));
28811           code = PLUS;
28812         }
28813       /* FALLTHRU */
28814
28815     case PLUS:
28816       if (mode == DImode)
28817         {
28818           /* DImode plus/minus need to clobber flags.  */
28819           /* The adddi3 and subdi3 patterns are incorrectly written so that
28820              they require matching operands, even when we could easily support
28821              three operands.  Thankfully, this can be fixed up post-splitting,
28822              as the individual add+adc patterns do accept three operands and
28823              post-reload cprop can make these moves go away.  */
28824           emit_move_insn (new_out, old_out);
28825           if (code == PLUS)
28826             x = gen_adddi3 (new_out, new_out, value);
28827           else
28828             x = gen_subdi3 (new_out, new_out, value);
28829           emit_insn (x);
28830           break;
28831         }
28832       /* FALLTHRU */
28833
28834     default:
28835       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28836       emit_insn (gen_rtx_SET (new_out, x));
28837       break;
28838     }
28839
28840   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28841                             use_release);
28842
28843   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28844   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28845
28846   /* Checks whether a barrier is needed and emits one accordingly.  */
28847   if (is_armv8_sync
28848       || !(use_acquire || use_release))
28849     arm_post_atomic_barrier (model);
28850 }
28851 \f
28852 #define MAX_VECT_LEN 16
28853
28854 struct expand_vec_perm_d
28855 {
28856   rtx target, op0, op1;
28857   auto_vec_perm_indices perm;
28858   machine_mode vmode;
28859   bool one_vector_p;
28860   bool testing_p;
28861 };
28862
28863 /* Generate a variable permutation.  */
28864
28865 static void
28866 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28867 {
28868   machine_mode vmode = GET_MODE (target);
28869   bool one_vector_p = rtx_equal_p (op0, op1);
28870
28871   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28872   gcc_checking_assert (GET_MODE (op0) == vmode);
28873   gcc_checking_assert (GET_MODE (op1) == vmode);
28874   gcc_checking_assert (GET_MODE (sel) == vmode);
28875   gcc_checking_assert (TARGET_NEON);
28876
28877   if (one_vector_p)
28878     {
28879       if (vmode == V8QImode)
28880         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28881       else
28882         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28883     }
28884   else
28885     {
28886       rtx pair;
28887
28888       if (vmode == V8QImode)
28889         {
28890           pair = gen_reg_rtx (V16QImode);
28891           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28892           pair = gen_lowpart (TImode, pair);
28893           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28894         }
28895       else
28896         {
28897           pair = gen_reg_rtx (OImode);
28898           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28899           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28900         }
28901     }
28902 }
28903
28904 void
28905 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28906 {
28907   machine_mode vmode = GET_MODE (target);
28908   unsigned int nelt = GET_MODE_NUNITS (vmode);
28909   bool one_vector_p = rtx_equal_p (op0, op1);
28910   rtx mask;
28911
28912   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28913      numbering of elements for big-endian, we must reverse the order.  */
28914   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28915
28916   /* The VTBL instruction does not use a modulo index, so we must take care
28917      of that ourselves.  */
28918   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28919   mask = gen_const_vec_duplicate (vmode, mask);
28920   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28921
28922   arm_expand_vec_perm_1 (target, op0, op1, sel);
28923 }
28924
28925 /* Map lane ordering between architectural lane order, and GCC lane order,
28926    taking into account ABI.  See comment above output_move_neon for details.  */
28927
28928 static int
28929 neon_endian_lane_map (machine_mode mode, int lane)
28930 {
28931   if (BYTES_BIG_ENDIAN)
28932   {
28933     int nelems = GET_MODE_NUNITS (mode);
28934     /* Reverse lane order.  */
28935     lane = (nelems - 1 - lane);
28936     /* Reverse D register order, to match ABI.  */
28937     if (GET_MODE_SIZE (mode) == 16)
28938       lane = lane ^ (nelems / 2);
28939   }
28940   return lane;
28941 }
28942
28943 /* Some permutations index into pairs of vectors, this is a helper function
28944    to map indexes into those pairs of vectors.  */
28945
28946 static int
28947 neon_pair_endian_lane_map (machine_mode mode, int lane)
28948 {
28949   int nelem = GET_MODE_NUNITS (mode);
28950   if (BYTES_BIG_ENDIAN)
28951     lane =
28952       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28953   return lane;
28954 }
28955
28956 /* Generate or test for an insn that supports a constant permutation.  */
28957
28958 /* Recognize patterns for the VUZP insns.  */
28959
28960 static bool
28961 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28962 {
28963   unsigned int i, odd, mask, nelt = d->perm.length ();
28964   rtx out0, out1, in0, in1;
28965   rtx (*gen)(rtx, rtx, rtx, rtx);
28966   int first_elem;
28967   int swap_nelt;
28968
28969   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28970     return false;
28971
28972   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28973      big endian pattern on 64 bit vectors, so we correct for that.  */
28974   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28975     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28976
28977   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28978
28979   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28980     odd = 0;
28981   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28982     odd = 1;
28983   else
28984     return false;
28985   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28986
28987   for (i = 0; i < nelt; i++)
28988     {
28989       unsigned elt =
28990         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28991       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28992         return false;
28993     }
28994
28995   /* Success!  */
28996   if (d->testing_p)
28997     return true;
28998
28999   switch (d->vmode)
29000     {
29001     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29002     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29003     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29004     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29005     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29006     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29007     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29008     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29009     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29010     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29011     default:
29012       gcc_unreachable ();
29013     }
29014
29015   in0 = d->op0;
29016   in1 = d->op1;
29017   if (swap_nelt != 0)
29018     std::swap (in0, in1);
29019
29020   out0 = d->target;
29021   out1 = gen_reg_rtx (d->vmode);
29022   if (odd)
29023     std::swap (out0, out1);
29024
29025   emit_insn (gen (out0, in0, in1, out1));
29026   return true;
29027 }
29028
29029 /* Recognize patterns for the VZIP insns.  */
29030
29031 static bool
29032 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29033 {
29034   unsigned int i, high, mask, nelt = d->perm.length ();
29035   rtx out0, out1, in0, in1;
29036   rtx (*gen)(rtx, rtx, rtx, rtx);
29037   int first_elem;
29038   bool is_swapped;
29039
29040   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29041     return false;
29042
29043   is_swapped = BYTES_BIG_ENDIAN;
29044
29045   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29046
29047   high = nelt / 2;
29048   if (first_elem == neon_endian_lane_map (d->vmode, high))
29049     ;
29050   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29051     high = 0;
29052   else
29053     return false;
29054   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29055
29056   for (i = 0; i < nelt / 2; i++)
29057     {
29058       unsigned elt =
29059         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29060       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29061           != elt)
29062         return false;
29063       elt =
29064         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29065       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29066           != elt)
29067         return false;
29068     }
29069
29070   /* Success!  */
29071   if (d->testing_p)
29072     return true;
29073
29074   switch (d->vmode)
29075     {
29076     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29077     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29078     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29079     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29080     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29081     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29082     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29083     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29084     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29085     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29086     default:
29087       gcc_unreachable ();
29088     }
29089
29090   in0 = d->op0;
29091   in1 = d->op1;
29092   if (is_swapped)
29093     std::swap (in0, in1);
29094
29095   out0 = d->target;
29096   out1 = gen_reg_rtx (d->vmode);
29097   if (high)
29098     std::swap (out0, out1);
29099
29100   emit_insn (gen (out0, in0, in1, out1));
29101   return true;
29102 }
29103
29104 /* Recognize patterns for the VREV insns.  */
29105
29106 static bool
29107 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29108 {
29109   unsigned int i, j, diff, nelt = d->perm.length ();
29110   rtx (*gen)(rtx, rtx);
29111
29112   if (!d->one_vector_p)
29113     return false;
29114
29115   diff = d->perm[0];
29116   switch (diff)
29117     {
29118     case 7:
29119       switch (d->vmode)
29120         {
29121         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29122         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29123         default:
29124           return false;
29125         }
29126       break;
29127     case 3:
29128       switch (d->vmode)
29129         {
29130         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29131         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29132         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29133         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29134         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29135         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29136         default:
29137           return false;
29138         }
29139       break;
29140     case 1:
29141       switch (d->vmode)
29142         {
29143         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29144         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29145         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29146         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29147         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29148         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29149         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29150         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29151         default:
29152           return false;
29153         }
29154       break;
29155     default:
29156       return false;
29157     }
29158
29159   for (i = 0; i < nelt ; i += diff + 1)
29160     for (j = 0; j <= diff; j += 1)
29161       {
29162         /* This is guaranteed to be true as the value of diff
29163            is 7, 3, 1 and we should have enough elements in the
29164            queue to generate this. Getting a vector mask with a
29165            value of diff other than these values implies that
29166            something is wrong by the time we get here.  */
29167         gcc_assert (i + j < nelt);
29168         if (d->perm[i + j] != i + diff - j)
29169           return false;
29170       }
29171
29172   /* Success! */
29173   if (d->testing_p)
29174     return true;
29175
29176   emit_insn (gen (d->target, d->op0));
29177   return true;
29178 }
29179
29180 /* Recognize patterns for the VTRN insns.  */
29181
29182 static bool
29183 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29184 {
29185   unsigned int i, odd, mask, nelt = d->perm.length ();
29186   rtx out0, out1, in0, in1;
29187   rtx (*gen)(rtx, rtx, rtx, rtx);
29188
29189   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29190     return false;
29191
29192   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29193   if (d->perm[0] == 0)
29194     odd = 0;
29195   else if (d->perm[0] == 1)
29196     odd = 1;
29197   else
29198     return false;
29199   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29200
29201   for (i = 0; i < nelt; i += 2)
29202     {
29203       if (d->perm[i] != i + odd)
29204         return false;
29205       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29206         return false;
29207     }
29208
29209   /* Success!  */
29210   if (d->testing_p)
29211     return true;
29212
29213   switch (d->vmode)
29214     {
29215     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29216     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29217     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29218     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29219     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29220     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29221     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29222     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29223     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29224     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29225     default:
29226       gcc_unreachable ();
29227     }
29228
29229   in0 = d->op0;
29230   in1 = d->op1;
29231   if (BYTES_BIG_ENDIAN)
29232     {
29233       std::swap (in0, in1);
29234       odd = !odd;
29235     }
29236
29237   out0 = d->target;
29238   out1 = gen_reg_rtx (d->vmode);
29239   if (odd)
29240     std::swap (out0, out1);
29241
29242   emit_insn (gen (out0, in0, in1, out1));
29243   return true;
29244 }
29245
29246 /* Recognize patterns for the VEXT insns.  */
29247
29248 static bool
29249 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29250 {
29251   unsigned int i, nelt = d->perm.length ();
29252   rtx (*gen) (rtx, rtx, rtx, rtx);
29253   rtx offset;
29254
29255   unsigned int location;
29256
29257   unsigned int next  = d->perm[0] + 1;
29258
29259   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29260   if (BYTES_BIG_ENDIAN)
29261     return false;
29262
29263   /* Check if the extracted indexes are increasing by one.  */
29264   for (i = 1; i < nelt; next++, i++)
29265     {
29266       /* If we hit the most significant element of the 2nd vector in
29267          the previous iteration, no need to test further.  */
29268       if (next == 2 * nelt)
29269         return false;
29270
29271       /* If we are operating on only one vector: it could be a
29272          rotation.  If there are only two elements of size < 64, let
29273          arm_evpc_neon_vrev catch it.  */
29274       if (d->one_vector_p && (next == nelt))
29275         {
29276           if ((nelt == 2) && (d->vmode != V2DImode))
29277             return false;
29278           else
29279             next = 0;
29280         }
29281
29282       if (d->perm[i] != next)
29283         return false;
29284     }
29285
29286   location = d->perm[0];
29287
29288   switch (d->vmode)
29289     {
29290     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29291     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29292     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29293     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29294     case E_V2SImode: gen = gen_neon_vextv2si; break;
29295     case E_V4SImode: gen = gen_neon_vextv4si; break;
29296     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29297     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29298     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29299     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29300     case E_V2DImode: gen = gen_neon_vextv2di; break;
29301     default:
29302       return false;
29303     }
29304
29305   /* Success! */
29306   if (d->testing_p)
29307     return true;
29308
29309   offset = GEN_INT (location);
29310   emit_insn (gen (d->target, d->op0, d->op1, offset));
29311   return true;
29312 }
29313
29314 /* The NEON VTBL instruction is a fully variable permuation that's even
29315    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29316    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29317    can do slightly better by expanding this as a constant where we don't
29318    have to apply a mask.  */
29319
29320 static bool
29321 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29322 {
29323   rtx rperm[MAX_VECT_LEN], sel;
29324   machine_mode vmode = d->vmode;
29325   unsigned int i, nelt = d->perm.length ();
29326
29327   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29328      numbering of elements for big-endian, we must reverse the order.  */
29329   if (BYTES_BIG_ENDIAN)
29330     return false;
29331
29332   if (d->testing_p)
29333     return true;
29334
29335   /* Generic code will try constant permutation twice.  Once with the
29336      original mode and again with the elements lowered to QImode.
29337      So wait and don't do the selector expansion ourselves.  */
29338   if (vmode != V8QImode && vmode != V16QImode)
29339     return false;
29340
29341   for (i = 0; i < nelt; ++i)
29342     rperm[i] = GEN_INT (d->perm[i]);
29343   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29344   sel = force_reg (vmode, sel);
29345
29346   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29347   return true;
29348 }
29349
29350 static bool
29351 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29352 {
29353   /* Check if the input mask matches vext before reordering the
29354      operands.  */
29355   if (TARGET_NEON)
29356     if (arm_evpc_neon_vext (d))
29357       return true;
29358
29359   /* The pattern matching functions above are written to look for a small
29360      number to begin the sequence (0, 1, N/2).  If we begin with an index
29361      from the second operand, we can swap the operands.  */
29362   unsigned int nelt = d->perm.length ();
29363   if (d->perm[0] >= nelt)
29364     {
29365       for (unsigned int i = 0; i < nelt; ++i)
29366         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29367
29368       std::swap (d->op0, d->op1);
29369     }
29370
29371   if (TARGET_NEON)
29372     {
29373       if (arm_evpc_neon_vuzp (d))
29374         return true;
29375       if (arm_evpc_neon_vzip (d))
29376         return true;
29377       if (arm_evpc_neon_vrev (d))
29378         return true;
29379       if (arm_evpc_neon_vtrn (d))
29380         return true;
29381       return arm_evpc_neon_vtbl (d);
29382     }
29383   return false;
29384 }
29385
29386 /* Expand a vec_perm_const pattern.  */
29387
29388 bool
29389 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29390 {
29391   struct expand_vec_perm_d d;
29392   int i, nelt, which;
29393
29394   d.target = target;
29395   d.op0 = op0;
29396   d.op1 = op1;
29397
29398   d.vmode = GET_MODE (target);
29399   gcc_assert (VECTOR_MODE_P (d.vmode));
29400   d.testing_p = false;
29401
29402   nelt = GET_MODE_NUNITS (d.vmode);
29403   d.perm.reserve (nelt);
29404   for (i = which = 0; i < nelt; ++i)
29405     {
29406       rtx e = XVECEXP (sel, 0, i);
29407       int ei = INTVAL (e) & (2 * nelt - 1);
29408       which |= (ei < nelt ? 1 : 2);
29409       d.perm.quick_push (ei);
29410     }
29411
29412   switch (which)
29413     {
29414     default:
29415       gcc_unreachable();
29416
29417     case 3:
29418       d.one_vector_p = false;
29419       if (!rtx_equal_p (op0, op1))
29420         break;
29421
29422       /* The elements of PERM do not suggest that only the first operand
29423          is used, but both operands are identical.  Allow easier matching
29424          of the permutation by folding the permutation into the single
29425          input vector.  */
29426       /* FALLTHRU */
29427     case 2:
29428       for (i = 0; i < nelt; ++i)
29429         d.perm[i] &= nelt - 1;
29430       d.op0 = op1;
29431       d.one_vector_p = true;
29432       break;
29433
29434     case 1:
29435       d.op1 = op0;
29436       d.one_vector_p = true;
29437       break;
29438     }
29439
29440   return arm_expand_vec_perm_const_1 (&d);
29441 }
29442
29443 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29444
29445 static bool
29446 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29447 {
29448   struct expand_vec_perm_d d;
29449   unsigned int i, nelt, which;
29450   bool ret;
29451
29452   d.vmode = vmode;
29453   d.testing_p = true;
29454   d.perm.safe_splice (sel);
29455
29456   /* Categorize the set of elements in the selector.  */
29457   nelt = GET_MODE_NUNITS (d.vmode);
29458   for (i = which = 0; i < nelt; ++i)
29459     {
29460       unsigned int e = d.perm[i];
29461       gcc_assert (e < 2 * nelt);
29462       which |= (e < nelt ? 1 : 2);
29463     }
29464
29465   /* For all elements from second vector, fold the elements to first.  */
29466   if (which == 2)
29467     for (i = 0; i < nelt; ++i)
29468       d.perm[i] -= nelt;
29469
29470   /* Check whether the mask can be applied to the vector type.  */
29471   d.one_vector_p = (which != 3);
29472
29473   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29474   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29475   if (!d.one_vector_p)
29476     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29477
29478   start_sequence ();
29479   ret = arm_expand_vec_perm_const_1 (&d);
29480   end_sequence ();
29481
29482   return ret;
29483 }
29484
29485 bool
29486 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29487 {
29488   /* If we are soft float and we do not have ldrd
29489      then all auto increment forms are ok.  */
29490   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29491     return true;
29492
29493   switch (code)
29494     {
29495       /* Post increment and Pre Decrement are supported for all
29496          instruction forms except for vector forms.  */
29497     case ARM_POST_INC:
29498     case ARM_PRE_DEC:
29499       if (VECTOR_MODE_P (mode))
29500         {
29501           if (code != ARM_PRE_DEC)
29502             return true;
29503           else
29504             return false;
29505         }
29506
29507       return true;
29508
29509     case ARM_POST_DEC:
29510     case ARM_PRE_INC:
29511       /* Without LDRD and mode size greater than
29512          word size, there is no point in auto-incrementing
29513          because ldm and stm will not have these forms.  */
29514       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29515         return false;
29516
29517       /* Vector and floating point modes do not support
29518          these auto increment forms.  */
29519       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29520         return false;
29521
29522       return true;
29523
29524     default:
29525       return false;
29526
29527     }
29528
29529   return false;
29530 }
29531
29532 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29533    on ARM, since we know that shifts by negative amounts are no-ops.
29534    Additionally, the default expansion code is not available or suitable
29535    for post-reload insn splits (this can occur when the register allocator
29536    chooses not to do a shift in NEON).
29537
29538    This function is used in both initial expand and post-reload splits, and
29539    handles all kinds of 64-bit shifts.
29540
29541    Input requirements:
29542     - It is safe for the input and output to be the same register, but
29543       early-clobber rules apply for the shift amount and scratch registers.
29544     - Shift by register requires both scratch registers.  In all other cases
29545       the scratch registers may be NULL.
29546     - Ashiftrt by a register also clobbers the CC register.  */
29547 void
29548 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29549                                rtx amount, rtx scratch1, rtx scratch2)
29550 {
29551   rtx out_high = gen_highpart (SImode, out);
29552   rtx out_low = gen_lowpart (SImode, out);
29553   rtx in_high = gen_highpart (SImode, in);
29554   rtx in_low = gen_lowpart (SImode, in);
29555
29556   /* Terminology:
29557         in = the register pair containing the input value.
29558         out = the destination register pair.
29559         up = the high- or low-part of each pair.
29560         down = the opposite part to "up".
29561      In a shift, we can consider bits to shift from "up"-stream to
29562      "down"-stream, so in a left-shift "up" is the low-part and "down"
29563      is the high-part of each register pair.  */
29564
29565   rtx out_up   = code == ASHIFT ? out_low : out_high;
29566   rtx out_down = code == ASHIFT ? out_high : out_low;
29567   rtx in_up   = code == ASHIFT ? in_low : in_high;
29568   rtx in_down = code == ASHIFT ? in_high : in_low;
29569
29570   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29571   gcc_assert (out
29572               && (REG_P (out) || GET_CODE (out) == SUBREG)
29573               && GET_MODE (out) == DImode);
29574   gcc_assert (in
29575               && (REG_P (in) || GET_CODE (in) == SUBREG)
29576               && GET_MODE (in) == DImode);
29577   gcc_assert (amount
29578               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29579                    && GET_MODE (amount) == SImode)
29580                   || CONST_INT_P (amount)));
29581   gcc_assert (scratch1 == NULL
29582               || (GET_CODE (scratch1) == SCRATCH)
29583               || (GET_MODE (scratch1) == SImode
29584                   && REG_P (scratch1)));
29585   gcc_assert (scratch2 == NULL
29586               || (GET_CODE (scratch2) == SCRATCH)
29587               || (GET_MODE (scratch2) == SImode
29588                   && REG_P (scratch2)));
29589   gcc_assert (!REG_P (out) || !REG_P (amount)
29590               || !HARD_REGISTER_P (out)
29591               || (REGNO (out) != REGNO (amount)
29592                   && REGNO (out) + 1 != REGNO (amount)));
29593
29594   /* Macros to make following code more readable.  */
29595   #define SUB_32(DEST,SRC) \
29596             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29597   #define RSB_32(DEST,SRC) \
29598             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29599   #define SUB_S_32(DEST,SRC) \
29600             gen_addsi3_compare0 ((DEST), (SRC), \
29601                                  GEN_INT (-32))
29602   #define SET(DEST,SRC) \
29603             gen_rtx_SET ((DEST), (SRC))
29604   #define SHIFT(CODE,SRC,AMOUNT) \
29605             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29606   #define LSHIFT(CODE,SRC,AMOUNT) \
29607             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29608                             SImode, (SRC), (AMOUNT))
29609   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29610             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29611                             SImode, (SRC), (AMOUNT))
29612   #define ORR(A,B) \
29613             gen_rtx_IOR (SImode, (A), (B))
29614   #define BRANCH(COND,LABEL) \
29615             gen_arm_cond_branch ((LABEL), \
29616                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29617                                                    const0_rtx), \
29618                                  cc_reg)
29619
29620   /* Shifts by register and shifts by constant are handled separately.  */
29621   if (CONST_INT_P (amount))
29622     {
29623       /* We have a shift-by-constant.  */
29624
29625       /* First, handle out-of-range shift amounts.
29626          In both cases we try to match the result an ARM instruction in a
29627          shift-by-register would give.  This helps reduce execution
29628          differences between optimization levels, but it won't stop other
29629          parts of the compiler doing different things.  This is "undefined
29630          behavior, in any case.  */
29631       if (INTVAL (amount) <= 0)
29632         emit_insn (gen_movdi (out, in));
29633       else if (INTVAL (amount) >= 64)
29634         {
29635           if (code == ASHIFTRT)
29636             {
29637               rtx const31_rtx = GEN_INT (31);
29638               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29639               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29640             }
29641           else
29642             emit_insn (gen_movdi (out, const0_rtx));
29643         }
29644
29645       /* Now handle valid shifts. */
29646       else if (INTVAL (amount) < 32)
29647         {
29648           /* Shifts by a constant less than 32.  */
29649           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29650
29651           /* Clearing the out register in DImode first avoids lots
29652              of spilling and results in less stack usage.
29653              Later this redundant insn is completely removed.
29654              Do that only if "in" and "out" are different registers.  */
29655           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29656             emit_insn (SET (out, const0_rtx));
29657           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29658           emit_insn (SET (out_down,
29659                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29660                                out_down)));
29661           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29662         }
29663       else
29664         {
29665           /* Shifts by a constant greater than 31.  */
29666           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29667
29668           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29669             emit_insn (SET (out, const0_rtx));
29670           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29671           if (code == ASHIFTRT)
29672             emit_insn (gen_ashrsi3 (out_up, in_up,
29673                                     GEN_INT (31)));
29674           else
29675             emit_insn (SET (out_up, const0_rtx));
29676         }
29677     }
29678   else
29679     {
29680       /* We have a shift-by-register.  */
29681       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29682
29683       /* This alternative requires the scratch registers.  */
29684       gcc_assert (scratch1 && REG_P (scratch1));
29685       gcc_assert (scratch2 && REG_P (scratch2));
29686
29687       /* We will need the values "amount-32" and "32-amount" later.
29688          Swapping them around now allows the later code to be more general. */
29689       switch (code)
29690         {
29691         case ASHIFT:
29692           emit_insn (SUB_32 (scratch1, amount));
29693           emit_insn (RSB_32 (scratch2, amount));
29694           break;
29695         case ASHIFTRT:
29696           emit_insn (RSB_32 (scratch1, amount));
29697           /* Also set CC = amount > 32.  */
29698           emit_insn (SUB_S_32 (scratch2, amount));
29699           break;
29700         case LSHIFTRT:
29701           emit_insn (RSB_32 (scratch1, amount));
29702           emit_insn (SUB_32 (scratch2, amount));
29703           break;
29704         default:
29705           gcc_unreachable ();
29706         }
29707
29708       /* Emit code like this:
29709
29710          arithmetic-left:
29711             out_down = in_down << amount;
29712             out_down = (in_up << (amount - 32)) | out_down;
29713             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29714             out_up = in_up << amount;
29715
29716          arithmetic-right:
29717             out_down = in_down >> amount;
29718             out_down = (in_up << (32 - amount)) | out_down;
29719             if (amount < 32)
29720               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29721             out_up = in_up << amount;
29722
29723          logical-right:
29724             out_down = in_down >> amount;
29725             out_down = (in_up << (32 - amount)) | out_down;
29726             if (amount < 32)
29727               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29728             out_up = in_up << amount;
29729
29730           The ARM and Thumb2 variants are the same but implemented slightly
29731           differently.  If this were only called during expand we could just
29732           use the Thumb2 case and let combine do the right thing, but this
29733           can also be called from post-reload splitters.  */
29734
29735       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29736
29737       if (!TARGET_THUMB2)
29738         {
29739           /* Emit code for ARM mode.  */
29740           emit_insn (SET (out_down,
29741                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29742           if (code == ASHIFTRT)
29743             {
29744               rtx_code_label *done_label = gen_label_rtx ();
29745               emit_jump_insn (BRANCH (LT, done_label));
29746               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29747                                              out_down)));
29748               emit_label (done_label);
29749             }
29750           else
29751             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29752                                            out_down)));
29753         }
29754       else
29755         {
29756           /* Emit code for Thumb2 mode.
29757              Thumb2 can't do shift and or in one insn.  */
29758           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29759           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29760
29761           if (code == ASHIFTRT)
29762             {
29763               rtx_code_label *done_label = gen_label_rtx ();
29764               emit_jump_insn (BRANCH (LT, done_label));
29765               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29766               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29767               emit_label (done_label);
29768             }
29769           else
29770             {
29771               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29772               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29773             }
29774         }
29775
29776       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29777     }
29778
29779   #undef SUB_32
29780   #undef RSB_32
29781   #undef SUB_S_32
29782   #undef SET
29783   #undef SHIFT
29784   #undef LSHIFT
29785   #undef REV_LSHIFT
29786   #undef ORR
29787   #undef BRANCH
29788 }
29789
29790 /* Returns true if the pattern is a valid symbolic address, which is either a
29791    symbol_ref or (symbol_ref + addend).
29792
29793    According to the ARM ELF ABI, the initial addend of REL-type relocations
29794    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29795    literal field of the instruction as a 16-bit signed value in the range
29796    -32768 <= A < 32768.  */
29797
29798 bool
29799 arm_valid_symbolic_address_p (rtx addr)
29800 {
29801   rtx xop0, xop1 = NULL_RTX;
29802   rtx tmp = addr;
29803
29804   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29805     return true;
29806
29807   /* (const (plus: symbol_ref const_int))  */
29808   if (GET_CODE (addr) == CONST)
29809     tmp = XEXP (addr, 0);
29810
29811   if (GET_CODE (tmp) == PLUS)
29812     {
29813       xop0 = XEXP (tmp, 0);
29814       xop1 = XEXP (tmp, 1);
29815
29816       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29817           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29818     }
29819
29820   return false;
29821 }
29822
29823 /* Returns true if a valid comparison operation and makes
29824    the operands in a form that is valid.  */
29825 bool
29826 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29827 {
29828   enum rtx_code code = GET_CODE (*comparison);
29829   int code_int;
29830   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29831     ? GET_MODE (*op2) : GET_MODE (*op1);
29832
29833   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29834
29835   if (code == UNEQ || code == LTGT)
29836     return false;
29837
29838   code_int = (int)code;
29839   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29840   PUT_CODE (*comparison, (enum rtx_code)code_int);
29841
29842   switch (mode)
29843     {
29844     case E_SImode:
29845       if (!arm_add_operand (*op1, mode))
29846         *op1 = force_reg (mode, *op1);
29847       if (!arm_add_operand (*op2, mode))
29848         *op2 = force_reg (mode, *op2);
29849       return true;
29850
29851     case E_DImode:
29852       if (!cmpdi_operand (*op1, mode))
29853         *op1 = force_reg (mode, *op1);
29854       if (!cmpdi_operand (*op2, mode))
29855         *op2 = force_reg (mode, *op2);
29856       return true;
29857
29858     case E_HFmode:
29859       if (!TARGET_VFP_FP16INST)
29860         break;
29861       /* FP16 comparisons are done in SF mode.  */
29862       mode = SFmode;
29863       *op1 = convert_to_mode (mode, *op1, 1);
29864       *op2 = convert_to_mode (mode, *op2, 1);
29865       /* Fall through.  */
29866     case E_SFmode:
29867     case E_DFmode:
29868       if (!vfp_compare_operand (*op1, mode))
29869         *op1 = force_reg (mode, *op1);
29870       if (!vfp_compare_operand (*op2, mode))
29871         *op2 = force_reg (mode, *op2);
29872       return true;
29873     default:
29874       break;
29875     }
29876
29877   return false;
29878
29879 }
29880
29881 /* Maximum number of instructions to set block of memory.  */
29882 static int
29883 arm_block_set_max_insns (void)
29884 {
29885   if (optimize_function_for_size_p (cfun))
29886     return 4;
29887   else
29888     return current_tune->max_insns_inline_memset;
29889 }
29890
29891 /* Return TRUE if it's profitable to set block of memory for
29892    non-vectorized case.  VAL is the value to set the memory
29893    with.  LENGTH is the number of bytes to set.  ALIGN is the
29894    alignment of the destination memory in bytes.  UNALIGNED_P
29895    is TRUE if we can only set the memory with instructions
29896    meeting alignment requirements.  USE_STRD_P is TRUE if we
29897    can use strd to set the memory.  */
29898 static bool
29899 arm_block_set_non_vect_profit_p (rtx val,
29900                                  unsigned HOST_WIDE_INT length,
29901                                  unsigned HOST_WIDE_INT align,
29902                                  bool unaligned_p, bool use_strd_p)
29903 {
29904   int num = 0;
29905   /* For leftovers in bytes of 0-7, we can set the memory block using
29906      strb/strh/str with minimum instruction number.  */
29907   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29908
29909   if (unaligned_p)
29910     {
29911       num = arm_const_inline_cost (SET, val);
29912       num += length / align + length % align;
29913     }
29914   else if (use_strd_p)
29915     {
29916       num = arm_const_double_inline_cost (val);
29917       num += (length >> 3) + leftover[length & 7];
29918     }
29919   else
29920     {
29921       num = arm_const_inline_cost (SET, val);
29922       num += (length >> 2) + leftover[length & 3];
29923     }
29924
29925   /* We may be able to combine last pair STRH/STRB into a single STR
29926      by shifting one byte back.  */
29927   if (unaligned_access && length > 3 && (length & 3) == 3)
29928     num--;
29929
29930   return (num <= arm_block_set_max_insns ());
29931 }
29932
29933 /* Return TRUE if it's profitable to set block of memory for
29934    vectorized case.  LENGTH is the number of bytes to set.
29935    ALIGN is the alignment of destination memory in bytes.
29936    MODE is the vector mode used to set the memory.  */
29937 static bool
29938 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29939                              unsigned HOST_WIDE_INT align,
29940                              machine_mode mode)
29941 {
29942   int num;
29943   bool unaligned_p = ((align & 3) != 0);
29944   unsigned int nelt = GET_MODE_NUNITS (mode);
29945
29946   /* Instruction loading constant value.  */
29947   num = 1;
29948   /* Instructions storing the memory.  */
29949   num += (length + nelt - 1) / nelt;
29950   /* Instructions adjusting the address expression.  Only need to
29951      adjust address expression if it's 4 bytes aligned and bytes
29952      leftover can only be stored by mis-aligned store instruction.  */
29953   if (!unaligned_p && (length & 3) != 0)
29954     num++;
29955
29956   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29957   if (!unaligned_p && mode == V16QImode)
29958     num--;
29959
29960   return (num <= arm_block_set_max_insns ());
29961 }
29962
29963 /* Set a block of memory using vectorization instructions for the
29964    unaligned case.  We fill the first LENGTH bytes of the memory
29965    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29966    the alignment requirement of memory.  Return TRUE if succeeded.  */
29967 static bool
29968 arm_block_set_unaligned_vect (rtx dstbase,
29969                               unsigned HOST_WIDE_INT length,
29970                               unsigned HOST_WIDE_INT value,
29971                               unsigned HOST_WIDE_INT align)
29972 {
29973   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29974   rtx dst, mem;
29975   rtx val_vec, reg;
29976   rtx (*gen_func) (rtx, rtx);
29977   machine_mode mode;
29978   unsigned HOST_WIDE_INT v = value;
29979   unsigned int offset = 0;
29980   gcc_assert ((align & 0x3) != 0);
29981   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29982   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29983   if (length >= nelt_v16)
29984     {
29985       mode = V16QImode;
29986       gen_func = gen_movmisalignv16qi;
29987     }
29988   else
29989     {
29990       mode = V8QImode;
29991       gen_func = gen_movmisalignv8qi;
29992     }
29993   nelt_mode = GET_MODE_NUNITS (mode);
29994   gcc_assert (length >= nelt_mode);
29995   /* Skip if it isn't profitable.  */
29996   if (!arm_block_set_vect_profit_p (length, align, mode))
29997     return false;
29998
29999   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30000   mem = adjust_automodify_address (dstbase, mode, dst, offset);
30001
30002   v = sext_hwi (v, BITS_PER_WORD);
30003
30004   reg = gen_reg_rtx (mode);
30005   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30006   /* Emit instruction loading the constant value.  */
30007   emit_move_insn (reg, val_vec);
30008
30009   /* Handle nelt_mode bytes in a vector.  */
30010   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30011     {
30012       emit_insn ((*gen_func) (mem, reg));
30013       if (i + 2 * nelt_mode <= length)
30014         {
30015           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30016           offset += nelt_mode;
30017           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30018         }
30019     }
30020
30021   /* If there are not less than nelt_v8 bytes leftover, we must be in
30022      V16QI mode.  */
30023   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30024
30025   /* Handle (8, 16) bytes leftover.  */
30026   if (i + nelt_v8 < length)
30027     {
30028       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30029       offset += length - i;
30030       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30031
30032       /* We are shifting bytes back, set the alignment accordingly.  */
30033       if ((length & 1) != 0 && align >= 2)
30034         set_mem_align (mem, BITS_PER_UNIT);
30035
30036       emit_insn (gen_movmisalignv16qi (mem, reg));
30037     }
30038   /* Handle (0, 8] bytes leftover.  */
30039   else if (i < length && i + nelt_v8 >= length)
30040     {
30041       if (mode == V16QImode)
30042         reg = gen_lowpart (V8QImode, reg);
30043
30044       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30045                                               + (nelt_mode - nelt_v8))));
30046       offset += (length - i) + (nelt_mode - nelt_v8);
30047       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30048
30049       /* We are shifting bytes back, set the alignment accordingly.  */
30050       if ((length & 1) != 0 && align >= 2)
30051         set_mem_align (mem, BITS_PER_UNIT);
30052
30053       emit_insn (gen_movmisalignv8qi (mem, reg));
30054     }
30055
30056   return true;
30057 }
30058
30059 /* Set a block of memory using vectorization instructions for the
30060    aligned case.  We fill the first LENGTH bytes of the memory area
30061    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30062    alignment requirement of memory.  Return TRUE if succeeded.  */
30063 static bool
30064 arm_block_set_aligned_vect (rtx dstbase,
30065                             unsigned HOST_WIDE_INT length,
30066                             unsigned HOST_WIDE_INT value,
30067                             unsigned HOST_WIDE_INT align)
30068 {
30069   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30070   rtx dst, addr, mem;
30071   rtx val_vec, reg;
30072   machine_mode mode;
30073   unsigned HOST_WIDE_INT v = value;
30074   unsigned int offset = 0;
30075
30076   gcc_assert ((align & 0x3) == 0);
30077   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30078   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30079   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30080     mode = V16QImode;
30081   else
30082     mode = V8QImode;
30083
30084   nelt_mode = GET_MODE_NUNITS (mode);
30085   gcc_assert (length >= nelt_mode);
30086   /* Skip if it isn't profitable.  */
30087   if (!arm_block_set_vect_profit_p (length, align, mode))
30088     return false;
30089
30090   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30091
30092   v = sext_hwi (v, BITS_PER_WORD);
30093
30094   reg = gen_reg_rtx (mode);
30095   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30096   /* Emit instruction loading the constant value.  */
30097   emit_move_insn (reg, val_vec);
30098
30099   i = 0;
30100   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30101   if (mode == V16QImode)
30102     {
30103       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30104       emit_insn (gen_movmisalignv16qi (mem, reg));
30105       i += nelt_mode;
30106       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30107       if (i + nelt_v8 < length && i + nelt_v16 > length)
30108         {
30109           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30110           offset += length - nelt_mode;
30111           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30112           /* We are shifting bytes back, set the alignment accordingly.  */
30113           if ((length & 0x3) == 0)
30114             set_mem_align (mem, BITS_PER_UNIT * 4);
30115           else if ((length & 0x1) == 0)
30116             set_mem_align (mem, BITS_PER_UNIT * 2);
30117           else
30118             set_mem_align (mem, BITS_PER_UNIT);
30119
30120           emit_insn (gen_movmisalignv16qi (mem, reg));
30121           return true;
30122         }
30123       /* Fall through for bytes leftover.  */
30124       mode = V8QImode;
30125       nelt_mode = GET_MODE_NUNITS (mode);
30126       reg = gen_lowpart (V8QImode, reg);
30127     }
30128
30129   /* Handle 8 bytes in a vector.  */
30130   for (; (i + nelt_mode <= length); i += nelt_mode)
30131     {
30132       addr = plus_constant (Pmode, dst, i);
30133       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30134       emit_move_insn (mem, reg);
30135     }
30136
30137   /* Handle single word leftover by shifting 4 bytes back.  We can
30138      use aligned access for this case.  */
30139   if (i + UNITS_PER_WORD == length)
30140     {
30141       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30142       offset += i - UNITS_PER_WORD;
30143       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30144       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30145       if (align > UNITS_PER_WORD)
30146         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30147
30148       emit_move_insn (mem, reg);
30149     }
30150   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30151      We have to use unaligned access for this case.  */
30152   else if (i < length)
30153     {
30154       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30155       offset += length - nelt_mode;
30156       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30157       /* We are shifting bytes back, set the alignment accordingly.  */
30158       if ((length & 1) == 0)
30159         set_mem_align (mem, BITS_PER_UNIT * 2);
30160       else
30161         set_mem_align (mem, BITS_PER_UNIT);
30162
30163       emit_insn (gen_movmisalignv8qi (mem, reg));
30164     }
30165
30166   return true;
30167 }
30168
30169 /* Set a block of memory using plain strh/strb instructions, only
30170    using instructions allowed by ALIGN on processor.  We fill the
30171    first LENGTH bytes of the memory area starting from DSTBASE
30172    with byte constant VALUE.  ALIGN is the alignment requirement
30173    of memory.  */
30174 static bool
30175 arm_block_set_unaligned_non_vect (rtx dstbase,
30176                                   unsigned HOST_WIDE_INT length,
30177                                   unsigned HOST_WIDE_INT value,
30178                                   unsigned HOST_WIDE_INT align)
30179 {
30180   unsigned int i;
30181   rtx dst, addr, mem;
30182   rtx val_exp, val_reg, reg;
30183   machine_mode mode;
30184   HOST_WIDE_INT v = value;
30185
30186   gcc_assert (align == 1 || align == 2);
30187
30188   if (align == 2)
30189     v |= (value << BITS_PER_UNIT);
30190
30191   v = sext_hwi (v, BITS_PER_WORD);
30192   val_exp = GEN_INT (v);
30193   /* Skip if it isn't profitable.  */
30194   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30195                                         align, true, false))
30196     return false;
30197
30198   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30199   mode = (align == 2 ? HImode : QImode);
30200   val_reg = force_reg (SImode, val_exp);
30201   reg = gen_lowpart (mode, val_reg);
30202
30203   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30204     {
30205       addr = plus_constant (Pmode, dst, i);
30206       mem = adjust_automodify_address (dstbase, mode, addr, i);
30207       emit_move_insn (mem, reg);
30208     }
30209
30210   /* Handle single byte leftover.  */
30211   if (i + 1 == length)
30212     {
30213       reg = gen_lowpart (QImode, val_reg);
30214       addr = plus_constant (Pmode, dst, i);
30215       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30216       emit_move_insn (mem, reg);
30217       i++;
30218     }
30219
30220   gcc_assert (i == length);
30221   return true;
30222 }
30223
30224 /* Set a block of memory using plain strd/str/strh/strb instructions,
30225    to permit unaligned copies on processors which support unaligned
30226    semantics for those instructions.  We fill the first LENGTH bytes
30227    of the memory area starting from DSTBASE with byte constant VALUE.
30228    ALIGN is the alignment requirement of memory.  */
30229 static bool
30230 arm_block_set_aligned_non_vect (rtx dstbase,
30231                                 unsigned HOST_WIDE_INT length,
30232                                 unsigned HOST_WIDE_INT value,
30233                                 unsigned HOST_WIDE_INT align)
30234 {
30235   unsigned int i;
30236   rtx dst, addr, mem;
30237   rtx val_exp, val_reg, reg;
30238   unsigned HOST_WIDE_INT v;
30239   bool use_strd_p;
30240
30241   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30242                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30243
30244   v = (value | (value << 8) | (value << 16) | (value << 24));
30245   if (length < UNITS_PER_WORD)
30246     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30247
30248   if (use_strd_p)
30249     v |= (v << BITS_PER_WORD);
30250   else
30251     v = sext_hwi (v, BITS_PER_WORD);
30252
30253   val_exp = GEN_INT (v);
30254   /* Skip if it isn't profitable.  */
30255   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30256                                         align, false, use_strd_p))
30257     {
30258       if (!use_strd_p)
30259         return false;
30260
30261       /* Try without strd.  */
30262       v = (v >> BITS_PER_WORD);
30263       v = sext_hwi (v, BITS_PER_WORD);
30264       val_exp = GEN_INT (v);
30265       use_strd_p = false;
30266       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30267                                             align, false, use_strd_p))
30268         return false;
30269     }
30270
30271   i = 0;
30272   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30273   /* Handle double words using strd if possible.  */
30274   if (use_strd_p)
30275     {
30276       val_reg = force_reg (DImode, val_exp);
30277       reg = val_reg;
30278       for (; (i + 8 <= length); i += 8)
30279         {
30280           addr = plus_constant (Pmode, dst, i);
30281           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30282           emit_move_insn (mem, reg);
30283         }
30284     }
30285   else
30286     val_reg = force_reg (SImode, val_exp);
30287
30288   /* Handle words.  */
30289   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30290   for (; (i + 4 <= length); i += 4)
30291     {
30292       addr = plus_constant (Pmode, dst, i);
30293       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30294       if ((align & 3) == 0)
30295         emit_move_insn (mem, reg);
30296       else
30297         emit_insn (gen_unaligned_storesi (mem, reg));
30298     }
30299
30300   /* Merge last pair of STRH and STRB into a STR if possible.  */
30301   if (unaligned_access && i > 0 && (i + 3) == length)
30302     {
30303       addr = plus_constant (Pmode, dst, i - 1);
30304       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30305       /* We are shifting one byte back, set the alignment accordingly.  */
30306       if ((align & 1) == 0)
30307         set_mem_align (mem, BITS_PER_UNIT);
30308
30309       /* Most likely this is an unaligned access, and we can't tell at
30310          compilation time.  */
30311       emit_insn (gen_unaligned_storesi (mem, reg));
30312       return true;
30313     }
30314
30315   /* Handle half word leftover.  */
30316   if (i + 2 <= length)
30317     {
30318       reg = gen_lowpart (HImode, val_reg);
30319       addr = plus_constant (Pmode, dst, i);
30320       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30321       if ((align & 1) == 0)
30322         emit_move_insn (mem, reg);
30323       else
30324         emit_insn (gen_unaligned_storehi (mem, reg));
30325
30326       i += 2;
30327     }
30328
30329   /* Handle single byte leftover.  */
30330   if (i + 1 == length)
30331     {
30332       reg = gen_lowpart (QImode, val_reg);
30333       addr = plus_constant (Pmode, dst, i);
30334       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30335       emit_move_insn (mem, reg);
30336     }
30337
30338   return true;
30339 }
30340
30341 /* Set a block of memory using vectorization instructions for both
30342    aligned and unaligned cases.  We fill the first LENGTH bytes of
30343    the memory area starting from DSTBASE with byte constant VALUE.
30344    ALIGN is the alignment requirement of memory.  */
30345 static bool
30346 arm_block_set_vect (rtx dstbase,
30347                     unsigned HOST_WIDE_INT length,
30348                     unsigned HOST_WIDE_INT value,
30349                     unsigned HOST_WIDE_INT align)
30350 {
30351   /* Check whether we need to use unaligned store instruction.  */
30352   if (((align & 3) != 0 || (length & 3) != 0)
30353       /* Check whether unaligned store instruction is available.  */
30354       && (!unaligned_access || BYTES_BIG_ENDIAN))
30355     return false;
30356
30357   if ((align & 3) == 0)
30358     return arm_block_set_aligned_vect (dstbase, length, value, align);
30359   else
30360     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30361 }
30362
30363 /* Expand string store operation.  Firstly we try to do that by using
30364    vectorization instructions, then try with ARM unaligned access and
30365    double-word store if profitable.  OPERANDS[0] is the destination,
30366    OPERANDS[1] is the number of bytes, operands[2] is the value to
30367    initialize the memory, OPERANDS[3] is the known alignment of the
30368    destination.  */
30369 bool
30370 arm_gen_setmem (rtx *operands)
30371 {
30372   rtx dstbase = operands[0];
30373   unsigned HOST_WIDE_INT length;
30374   unsigned HOST_WIDE_INT value;
30375   unsigned HOST_WIDE_INT align;
30376
30377   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30378     return false;
30379
30380   length = UINTVAL (operands[1]);
30381   if (length > 64)
30382     return false;
30383
30384   value = (UINTVAL (operands[2]) & 0xFF);
30385   align = UINTVAL (operands[3]);
30386   if (TARGET_NEON && length >= 8
30387       && current_tune->string_ops_prefer_neon
30388       && arm_block_set_vect (dstbase, length, value, align))
30389     return true;
30390
30391   if (!unaligned_access && (align & 3) != 0)
30392     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30393
30394   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30395 }
30396
30397
30398 static bool
30399 arm_macro_fusion_p (void)
30400 {
30401   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30402 }
30403
30404 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30405    for MOVW / MOVT macro fusion.  */
30406
30407 static bool
30408 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30409 {
30410   /* We are trying to fuse
30411      movw imm / movt imm
30412     instructions as a group that gets scheduled together.  */
30413
30414   rtx set_dest = SET_DEST (curr_set);
30415
30416   if (GET_MODE (set_dest) != SImode)
30417     return false;
30418
30419   /* We are trying to match:
30420      prev (movw)  == (set (reg r0) (const_int imm16))
30421      curr (movt) == (set (zero_extract (reg r0)
30422                                         (const_int 16)
30423                                         (const_int 16))
30424                           (const_int imm16_1))
30425      or
30426      prev (movw) == (set (reg r1)
30427                           (high (symbol_ref ("SYM"))))
30428     curr (movt) == (set (reg r0)
30429                         (lo_sum (reg r1)
30430                                 (symbol_ref ("SYM"))))  */
30431
30432     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30433       {
30434         if (CONST_INT_P (SET_SRC (curr_set))
30435             && CONST_INT_P (SET_SRC (prev_set))
30436             && REG_P (XEXP (set_dest, 0))
30437             && REG_P (SET_DEST (prev_set))
30438             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30439           return true;
30440
30441       }
30442     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30443              && REG_P (SET_DEST (curr_set))
30444              && REG_P (SET_DEST (prev_set))
30445              && GET_CODE (SET_SRC (prev_set)) == HIGH
30446              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30447       return true;
30448
30449   return false;
30450 }
30451
30452 static bool
30453 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30454 {
30455   rtx prev_set = single_set (prev);
30456   rtx curr_set = single_set (curr);
30457
30458   if (!prev_set
30459       || !curr_set)
30460     return false;
30461
30462   if (any_condjump_p (curr))
30463     return false;
30464
30465   if (!arm_macro_fusion_p ())
30466     return false;
30467
30468   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30469       && aarch_crypto_can_dual_issue (prev, curr))
30470     return true;
30471
30472   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30473       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30474     return true;
30475
30476   return false;
30477 }
30478
30479 /* Return true iff the instruction fusion described by OP is enabled.  */
30480 bool
30481 arm_fusion_enabled_p (tune_params::fuse_ops op)
30482 {
30483   return current_tune->fusible_ops & op;
30484 }
30485
30486 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30487    scheduled for speculative execution.  Reject the long-running division
30488    and square-root instructions.  */
30489
30490 static bool
30491 arm_sched_can_speculate_insn (rtx_insn *insn)
30492 {
30493   switch (get_attr_type (insn))
30494     {
30495       case TYPE_SDIV:
30496       case TYPE_UDIV:
30497       case TYPE_FDIVS:
30498       case TYPE_FDIVD:
30499       case TYPE_FSQRTS:
30500       case TYPE_FSQRTD:
30501       case TYPE_NEON_FP_SQRT_S:
30502       case TYPE_NEON_FP_SQRT_D:
30503       case TYPE_NEON_FP_SQRT_S_Q:
30504       case TYPE_NEON_FP_SQRT_D_Q:
30505       case TYPE_NEON_FP_DIV_S:
30506       case TYPE_NEON_FP_DIV_D:
30507       case TYPE_NEON_FP_DIV_S_Q:
30508       case TYPE_NEON_FP_DIV_D_Q:
30509         return false;
30510       default:
30511         return true;
30512     }
30513 }
30514
30515 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30516
30517 static unsigned HOST_WIDE_INT
30518 arm_asan_shadow_offset (void)
30519 {
30520   return HOST_WIDE_INT_1U << 29;
30521 }
30522
30523
30524 /* This is a temporary fix for PR60655.  Ideally we need
30525    to handle most of these cases in the generic part but
30526    currently we reject minus (..) (sym_ref).  We try to
30527    ameliorate the case with minus (sym_ref1) (sym_ref2)
30528    where they are in the same section.  */
30529
30530 static bool
30531 arm_const_not_ok_for_debug_p (rtx p)
30532 {
30533   tree decl_op0 = NULL;
30534   tree decl_op1 = NULL;
30535
30536   if (GET_CODE (p) == UNSPEC)
30537     return true;
30538   if (GET_CODE (p) == MINUS)
30539     {
30540       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30541         {
30542           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30543           if (decl_op1
30544               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30545               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30546             {
30547               if ((VAR_P (decl_op1)
30548                    || TREE_CODE (decl_op1) == CONST_DECL)
30549                   && (VAR_P (decl_op0)
30550                       || TREE_CODE (decl_op0) == CONST_DECL))
30551                 return (get_variable_section (decl_op1, false)
30552                         != get_variable_section (decl_op0, false));
30553
30554               if (TREE_CODE (decl_op1) == LABEL_DECL
30555                   && TREE_CODE (decl_op0) == LABEL_DECL)
30556                 return (DECL_CONTEXT (decl_op1)
30557                         != DECL_CONTEXT (decl_op0));
30558             }
30559
30560           return true;
30561         }
30562     }
30563
30564   return false;
30565 }
30566
30567 /* return TRUE if x is a reference to a value in a constant pool */
30568 extern bool
30569 arm_is_constant_pool_ref (rtx x)
30570 {
30571   return (MEM_P (x)
30572           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30573           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30574 }
30575
30576 /* Remember the last target of arm_set_current_function.  */
30577 static GTY(()) tree arm_previous_fndecl;
30578
30579 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30580
30581 void
30582 save_restore_target_globals (tree new_tree)
30583 {
30584   /* If we have a previous state, use it.  */
30585   if (TREE_TARGET_GLOBALS (new_tree))
30586     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30587   else if (new_tree == target_option_default_node)
30588     restore_target_globals (&default_target_globals);
30589   else
30590     {
30591       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30592       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30593     }
30594
30595   arm_option_params_internal ();
30596 }
30597
30598 /* Invalidate arm_previous_fndecl.  */
30599
30600 void
30601 arm_reset_previous_fndecl (void)
30602 {
30603   arm_previous_fndecl = NULL_TREE;
30604 }
30605
30606 /* Establish appropriate back-end context for processing the function
30607    FNDECL.  The argument might be NULL to indicate processing at top
30608    level, outside of any function scope.  */
30609
30610 static void
30611 arm_set_current_function (tree fndecl)
30612 {
30613   if (!fndecl || fndecl == arm_previous_fndecl)
30614     return;
30615
30616   tree old_tree = (arm_previous_fndecl
30617                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30618                    : NULL_TREE);
30619
30620   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30621
30622   /* If current function has no attributes but previous one did,
30623      use the default node.  */
30624   if (! new_tree && old_tree)
30625     new_tree = target_option_default_node;
30626
30627   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30628      the default have been handled by save_restore_target_globals from
30629      arm_pragma_target_parse.  */
30630   if (old_tree == new_tree)
30631     return;
30632
30633   arm_previous_fndecl = fndecl;
30634
30635   /* First set the target options.  */
30636   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30637
30638   save_restore_target_globals (new_tree);
30639 }
30640
30641 /* Implement TARGET_OPTION_PRINT.  */
30642
30643 static void
30644 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30645 {
30646   int flags = ptr->x_target_flags;
30647   const char *fpu_name;
30648
30649   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30650               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30651
30652   fprintf (file, "%*sselected isa %s\n", indent, "",
30653            TARGET_THUMB2_P (flags) ? "thumb2" :
30654            TARGET_THUMB_P (flags) ? "thumb1" :
30655            "arm");
30656
30657   if (ptr->x_arm_arch_string)
30658     fprintf (file, "%*sselected architecture %s\n", indent, "",
30659              ptr->x_arm_arch_string);
30660
30661   if (ptr->x_arm_cpu_string)
30662     fprintf (file, "%*sselected CPU %s\n", indent, "",
30663              ptr->x_arm_cpu_string);
30664
30665   if (ptr->x_arm_tune_string)
30666     fprintf (file, "%*sselected tune %s\n", indent, "",
30667              ptr->x_arm_tune_string);
30668
30669   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30670 }
30671
30672 /* Hook to determine if one function can safely inline another.  */
30673
30674 static bool
30675 arm_can_inline_p (tree caller, tree callee)
30676 {
30677   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30678   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30679   bool can_inline = true;
30680
30681   struct cl_target_option *caller_opts
30682         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30683                                            : target_option_default_node);
30684
30685   struct cl_target_option *callee_opts
30686         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30687                                            : target_option_default_node);
30688
30689   if (callee_opts == caller_opts)
30690     return true;
30691
30692   /* Callee's ISA features should be a subset of the caller's.  */
30693   struct arm_build_target caller_target;
30694   struct arm_build_target callee_target;
30695   caller_target.isa = sbitmap_alloc (isa_num_bits);
30696   callee_target.isa = sbitmap_alloc (isa_num_bits);
30697
30698   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30699                               false);
30700   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30701                               false);
30702   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30703     can_inline = false;
30704
30705   sbitmap_free (caller_target.isa);
30706   sbitmap_free (callee_target.isa);
30707
30708   /* OK to inline between different modes.
30709      Function with mode specific instructions, e.g using asm,
30710      must be explicitly protected with noinline.  */
30711   return can_inline;
30712 }
30713
30714 /* Hook to fix function's alignment affected by target attribute.  */
30715
30716 static void
30717 arm_relayout_function (tree fndecl)
30718 {
30719   if (DECL_USER_ALIGN (fndecl))
30720     return;
30721
30722   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30723
30724   if (!callee_tree)
30725     callee_tree = target_option_default_node;
30726
30727   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30728   SET_DECL_ALIGN
30729     (fndecl,
30730      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30731 }
30732
30733 /* Inner function to process the attribute((target(...))), take an argument and
30734    set the current options from the argument.  If we have a list, recursively
30735    go over the list.  */
30736
30737 static bool
30738 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30739 {
30740   if (TREE_CODE (args) == TREE_LIST)
30741     {
30742       bool ret = true;
30743
30744       for (; args; args = TREE_CHAIN (args))
30745         if (TREE_VALUE (args)
30746             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30747           ret = false;
30748       return ret;
30749     }
30750
30751   else if (TREE_CODE (args) != STRING_CST)
30752     {
30753       error ("attribute %<target%> argument not a string");
30754       return false;
30755     }
30756
30757   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30758   char *q;
30759
30760   while ((q = strtok (argstr, ",")) != NULL)
30761     {
30762       while (ISSPACE (*q)) ++q;
30763
30764       argstr = NULL;
30765       if (!strncmp (q, "thumb", 5))
30766           opts->x_target_flags |= MASK_THUMB;
30767
30768       else if (!strncmp (q, "arm", 3))
30769           opts->x_target_flags &= ~MASK_THUMB;
30770
30771       else if (!strncmp (q, "fpu=", 4))
30772         {
30773           int fpu_index;
30774           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30775                                        &fpu_index, CL_TARGET))
30776             {
30777               error ("invalid fpu for target attribute or pragma %qs", q);
30778               return false;
30779             }
30780           if (fpu_index == TARGET_FPU_auto)
30781             {
30782               /* This doesn't really make sense until we support
30783                  general dynamic selection of the architecture and all
30784                  sub-features.  */
30785               sorry ("auto fpu selection not currently permitted here");
30786               return false;
30787             }
30788           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30789         }
30790       else if (!strncmp (q, "arch=", 5))
30791         {
30792           char* arch = q+5;
30793           const arch_option *arm_selected_arch
30794              = arm_parse_arch_option_name (all_architectures, "arch", arch);
30795
30796           if (!arm_selected_arch)
30797             {
30798               error ("invalid architecture for target attribute or pragma %qs",
30799                      q);
30800               return false;
30801             }
30802
30803           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30804         }
30805       else if (q[0] == '+')
30806         {
30807           opts->x_arm_arch_string
30808             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30809         }
30810       else
30811         {
30812           error ("unknown target attribute or pragma %qs", q);
30813           return false;
30814         }
30815     }
30816
30817   return true;
30818 }
30819
30820 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30821
30822 tree
30823 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30824                                  struct gcc_options *opts_set)
30825 {
30826   struct cl_target_option cl_opts;
30827
30828   if (!arm_valid_target_attribute_rec (args, opts))
30829     return NULL_TREE;
30830
30831   cl_target_option_save (&cl_opts, opts);
30832   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30833   arm_option_check_internal (opts);
30834   /* Do any overrides, such as global options arch=xxx.
30835      We do this since arm_active_target was overridden.  */
30836   arm_option_reconfigure_globals ();
30837   arm_options_perform_arch_sanity_checks ();
30838   arm_option_override_internal (opts, opts_set);
30839
30840   return build_target_option_node (opts);
30841 }
30842
30843 static void
30844 add_attribute  (const char * mode, tree *attributes)
30845 {
30846   size_t len = strlen (mode);
30847   tree value = build_string (len, mode);
30848
30849   TREE_TYPE (value) = build_array_type (char_type_node,
30850                                         build_index_type (size_int (len)));
30851
30852   *attributes = tree_cons (get_identifier ("target"),
30853                            build_tree_list (NULL_TREE, value),
30854                            *attributes);
30855 }
30856
30857 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30858
30859 static void
30860 arm_insert_attributes (tree fndecl, tree * attributes)
30861 {
30862   const char *mode;
30863
30864   if (! TARGET_FLIP_THUMB)
30865     return;
30866
30867   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30868       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30869    return;
30870
30871   /* Nested definitions must inherit mode.  */
30872   if (current_function_decl)
30873    {
30874      mode = TARGET_THUMB ? "thumb" : "arm";
30875      add_attribute (mode, attributes);
30876      return;
30877    }
30878
30879   /* If there is already a setting don't change it.  */
30880   if (lookup_attribute ("target", *attributes) != NULL)
30881     return;
30882
30883   mode = thumb_flipper ? "thumb" : "arm";
30884   add_attribute (mode, attributes);
30885
30886   thumb_flipper = !thumb_flipper;
30887 }
30888
30889 /* Hook to validate attribute((target("string"))).  */
30890
30891 static bool
30892 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30893                               tree args, int ARG_UNUSED (flags))
30894 {
30895   bool ret = true;
30896   struct gcc_options func_options;
30897   tree cur_tree, new_optimize;
30898   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30899
30900   /* Get the optimization options of the current function.  */
30901   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30902
30903   /* If the function changed the optimization levels as well as setting target
30904      options, start with the optimizations specified.  */
30905   if (!func_optimize)
30906     func_optimize = optimization_default_node;
30907
30908   /* Init func_options.  */
30909   memset (&func_options, 0, sizeof (func_options));
30910   init_options_struct (&func_options, NULL);
30911   lang_hooks.init_options_struct (&func_options);
30912
30913   /* Initialize func_options to the defaults.  */
30914   cl_optimization_restore (&func_options,
30915                            TREE_OPTIMIZATION (func_optimize));
30916
30917   cl_target_option_restore (&func_options,
30918                             TREE_TARGET_OPTION (target_option_default_node));
30919
30920   /* Set func_options flags with new target mode.  */
30921   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30922                                               &global_options_set);
30923
30924   if (cur_tree == NULL_TREE)
30925     ret = false;
30926
30927   new_optimize = build_optimization_node (&func_options);
30928
30929   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30930
30931   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30932
30933   finalize_options_struct (&func_options);
30934
30935   return ret;
30936 }
30937
30938 /* Match an ISA feature bitmap to a named FPU.  We always use the
30939    first entry that exactly matches the feature set, so that we
30940    effectively canonicalize the FPU name for the assembler.  */
30941 static const char*
30942 arm_identify_fpu_from_isa (sbitmap isa)
30943 {
30944   auto_sbitmap fpubits (isa_num_bits);
30945   auto_sbitmap cand_fpubits (isa_num_bits);
30946
30947   bitmap_and (fpubits, isa, isa_all_fpubits);
30948
30949   /* If there are no ISA feature bits relating to the FPU, we must be
30950      doing soft-float.  */
30951   if (bitmap_empty_p (fpubits))
30952     return "softvfp";
30953
30954   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30955     {
30956       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30957       if (bitmap_equal_p (fpubits, cand_fpubits))
30958         return all_fpus[i].name;
30959     }
30960   /* We must find an entry, or things have gone wrong.  */
30961   gcc_unreachable ();
30962 }
30963
30964 /* The last .arch and .fpu assembly strings that we printed.  */
30965 static std::string arm_last_printed_arch_string;
30966 static std::string arm_last_printed_fpu_string;
30967
30968 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
30969    by the function fndecl.  */
30970 void
30971 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30972 {
30973   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30974
30975   struct cl_target_option *targ_options;
30976   if (target_parts)
30977     targ_options = TREE_TARGET_OPTION (target_parts);
30978   else
30979     targ_options = TREE_TARGET_OPTION (target_option_current_node);
30980   gcc_assert (targ_options);
30981
30982   /* Only update the assembler .arch string if it is distinct from the last
30983      such string we printed.  */
30984   std::string arch_to_print = targ_options->x_arm_arch_string;
30985   if (arch_to_print != arm_last_printed_arch_string)
30986     {
30987       std::string arch_name
30988         = arch_to_print.substr (0, arch_to_print.find ("+"));
30989       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30990       const arch_option *arch
30991         = arm_parse_arch_option_name (all_architectures, "-march",
30992                                       targ_options->x_arm_arch_string);
30993       auto_sbitmap opt_bits (isa_num_bits);
30994
30995       gcc_assert (arch);
30996       if (arch->common.extensions)
30997         {
30998           for (const struct cpu_arch_extension *opt = arch->common.extensions;
30999                opt->name != NULL;
31000                opt++)
31001             {
31002               if (!opt->remove)
31003                 {
31004                   arm_initialize_isa (opt_bits, opt->isa_bits);
31005                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31006                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31007                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31008                                  opt->name);
31009                 }
31010              }
31011         }
31012
31013       arm_last_printed_arch_string = arch_to_print;
31014     }
31015
31016   fprintf (stream, "\t.syntax unified\n");
31017
31018   if (TARGET_THUMB)
31019     {
31020       if (is_called_in_ARM_mode (decl)
31021           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31022               && cfun->is_thunk))
31023         fprintf (stream, "\t.code 32\n");
31024       else if (TARGET_THUMB1)
31025         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31026       else
31027         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31028     }
31029   else
31030     fprintf (stream, "\t.arm\n");
31031
31032   std::string fpu_to_print
31033     = TARGET_SOFT_FLOAT
31034         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31035
31036   if (fpu_to_print != arm_last_printed_arch_string)
31037     {
31038       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31039       arm_last_printed_fpu_string = fpu_to_print;
31040     }
31041
31042   if (TARGET_POKE_FUNCTION_NAME)
31043     arm_poke_function_name (stream, (const char *) name);
31044 }
31045
31046 /* If MEM is in the form of [base+offset], extract the two parts
31047    of address and set to BASE and OFFSET, otherwise return false
31048    after clearing BASE and OFFSET.  */
31049
31050 static bool
31051 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31052 {
31053   rtx addr;
31054
31055   gcc_assert (MEM_P (mem));
31056
31057   addr = XEXP (mem, 0);
31058
31059   /* Strip off const from addresses like (const (addr)).  */
31060   if (GET_CODE (addr) == CONST)
31061     addr = XEXP (addr, 0);
31062
31063   if (GET_CODE (addr) == REG)
31064     {
31065       *base = addr;
31066       *offset = const0_rtx;
31067       return true;
31068     }
31069
31070   if (GET_CODE (addr) == PLUS
31071       && GET_CODE (XEXP (addr, 0)) == REG
31072       && CONST_INT_P (XEXP (addr, 1)))
31073     {
31074       *base = XEXP (addr, 0);
31075       *offset = XEXP (addr, 1);
31076       return true;
31077     }
31078
31079   *base = NULL_RTX;
31080   *offset = NULL_RTX;
31081
31082   return false;
31083 }
31084
31085 /* If INSN is a load or store of address in the form of [base+offset],
31086    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31087    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31088    otherwise return FALSE.  */
31089
31090 static bool
31091 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31092 {
31093   rtx x, dest, src;
31094
31095   gcc_assert (INSN_P (insn));
31096   x = PATTERN (insn);
31097   if (GET_CODE (x) != SET)
31098     return false;
31099
31100   src = SET_SRC (x);
31101   dest = SET_DEST (x);
31102   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31103     {
31104       *is_load = false;
31105       extract_base_offset_in_addr (dest, base, offset);
31106     }
31107   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31108     {
31109       *is_load = true;
31110       extract_base_offset_in_addr (src, base, offset);
31111     }
31112   else
31113     return false;
31114
31115   return (*base != NULL_RTX && *offset != NULL_RTX);
31116 }
31117
31118 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31119
31120    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31121    and PRI are only calculated for these instructions.  For other instruction,
31122    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31123    instruction fusion can be supported by returning different priorities.
31124
31125    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31126
31127 static void
31128 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31129                            int *fusion_pri, int *pri)
31130 {
31131   int tmp, off_val;
31132   bool is_load;
31133   rtx base, offset;
31134
31135   gcc_assert (INSN_P (insn));
31136
31137   tmp = max_pri - 1;
31138   if (!fusion_load_store (insn, &base, &offset, &is_load))
31139     {
31140       *pri = tmp;
31141       *fusion_pri = tmp;
31142       return;
31143     }
31144
31145   /* Load goes first.  */
31146   if (is_load)
31147     *fusion_pri = tmp - 1;
31148   else
31149     *fusion_pri = tmp - 2;
31150
31151   tmp /= 2;
31152
31153   /* INSN with smaller base register goes first.  */
31154   tmp -= ((REGNO (base) & 0xff) << 20);
31155
31156   /* INSN with smaller offset goes first.  */
31157   off_val = (int)(INTVAL (offset));
31158   if (off_val >= 0)
31159     tmp -= (off_val & 0xfffff);
31160   else
31161     tmp += ((- off_val) & 0xfffff);
31162
31163   *pri = tmp;
31164   return;
31165 }
31166
31167
31168 /* Construct and return a PARALLEL RTX vector with elements numbering the
31169    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31170    the vector - from the perspective of the architecture.  This does not
31171    line up with GCC's perspective on lane numbers, so we end up with
31172    different masks depending on our target endian-ness.  The diagram
31173    below may help.  We must draw the distinction when building masks
31174    which select one half of the vector.  An instruction selecting
31175    architectural low-lanes for a big-endian target, must be described using
31176    a mask selecting GCC high-lanes.
31177
31178                  Big-Endian             Little-Endian
31179
31180 GCC             0   1   2   3           3   2   1   0
31181               | x | x | x | x |       | x | x | x | x |
31182 Architecture    3   2   1   0           3   2   1   0
31183
31184 Low Mask:         { 2, 3 }                { 0, 1 }
31185 High Mask:        { 0, 1 }                { 2, 3 }
31186 */
31187
31188 rtx
31189 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31190 {
31191   int nunits = GET_MODE_NUNITS (mode);
31192   rtvec v = rtvec_alloc (nunits / 2);
31193   int high_base = nunits / 2;
31194   int low_base = 0;
31195   int base;
31196   rtx t1;
31197   int i;
31198
31199   if (BYTES_BIG_ENDIAN)
31200     base = high ? low_base : high_base;
31201   else
31202     base = high ? high_base : low_base;
31203
31204   for (i = 0; i < nunits / 2; i++)
31205     RTVEC_ELT (v, i) = GEN_INT (base + i);
31206
31207   t1 = gen_rtx_PARALLEL (mode, v);
31208   return t1;
31209 }
31210
31211 /* Check OP for validity as a PARALLEL RTX vector with elements
31212    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31213    from the perspective of the architecture.  See the diagram above
31214    arm_simd_vect_par_cnst_half_p for more details.  */
31215
31216 bool
31217 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31218                                        bool high)
31219 {
31220   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31221   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31222   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31223   int i = 0;
31224
31225   if (!VECTOR_MODE_P (mode))
31226     return false;
31227
31228   if (count_op != count_ideal)
31229     return false;
31230
31231   for (i = 0; i < count_ideal; i++)
31232     {
31233       rtx elt_op = XVECEXP (op, 0, i);
31234       rtx elt_ideal = XVECEXP (ideal, 0, i);
31235
31236       if (!CONST_INT_P (elt_op)
31237           || INTVAL (elt_ideal) != INTVAL (elt_op))
31238         return false;
31239     }
31240   return true;
31241 }
31242
31243 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31244    in Thumb1.  */
31245 static bool
31246 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31247                          const_tree)
31248 {
31249   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31250   if (vcall_offset && TARGET_THUMB1)
31251     return false;
31252
31253   /* Otherwise ok.  */
31254   return true;
31255 }
31256
31257 /* Generate RTL for a conditional branch with rtx comparison CODE in
31258    mode CC_MODE. The destination of the unlikely conditional branch
31259    is LABEL_REF.  */
31260
31261 void
31262 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31263                           rtx label_ref)
31264 {
31265   rtx x;
31266   x = gen_rtx_fmt_ee (code, VOIDmode,
31267                       gen_rtx_REG (cc_mode, CC_REGNUM),
31268                       const0_rtx);
31269
31270   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31271                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31272                             pc_rtx);
31273   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31274 }
31275
31276 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31277
31278    For pure-code sections there is no letter code for this attribute, so
31279    output all the section flags numerically when this is needed.  */
31280
31281 static bool
31282 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31283 {
31284
31285   if (flags & SECTION_ARM_PURECODE)
31286     {
31287       *num = 0x20000000;
31288
31289       if (!(flags & SECTION_DEBUG))
31290         *num |= 0x2;
31291       if (flags & SECTION_EXCLUDE)
31292         *num |= 0x80000000;
31293       if (flags & SECTION_WRITE)
31294         *num |= 0x1;
31295       if (flags & SECTION_CODE)
31296         *num |= 0x4;
31297       if (flags & SECTION_MERGE)
31298         *num |= 0x10;
31299       if (flags & SECTION_STRINGS)
31300         *num |= 0x20;
31301       if (flags & SECTION_TLS)
31302         *num |= 0x400;
31303       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31304         *num |= 0x200;
31305
31306         return true;
31307     }
31308
31309   return false;
31310 }
31311
31312 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31313
31314    If pure-code is passed as an option, make sure all functions are in
31315    sections that have the SHF_ARM_PURECODE attribute.  */
31316
31317 static section *
31318 arm_function_section (tree decl, enum node_frequency freq,
31319                       bool startup, bool exit)
31320 {
31321   const char * section_name;
31322   section * sec;
31323
31324   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31325     return default_function_section (decl, freq, startup, exit);
31326
31327   if (!target_pure_code)
31328     return default_function_section (decl, freq, startup, exit);
31329
31330
31331   section_name = DECL_SECTION_NAME (decl);
31332
31333   /* If a function is not in a named section then it falls under the 'default'
31334      text section, also known as '.text'.  We can preserve previous behavior as
31335      the default text section already has the SHF_ARM_PURECODE section
31336      attribute.  */
31337   if (!section_name)
31338     {
31339       section *default_sec = default_function_section (decl, freq, startup,
31340                                                        exit);
31341
31342       /* If default_sec is not null, then it must be a special section like for
31343          example .text.startup.  We set the pure-code attribute and return the
31344          same section to preserve existing behavior.  */
31345       if (default_sec)
31346           default_sec->common.flags |= SECTION_ARM_PURECODE;
31347       return default_sec;
31348     }
31349
31350   /* Otherwise look whether a section has already been created with
31351      'section_name'.  */
31352   sec = get_named_section (decl, section_name, 0);
31353   if (!sec)
31354     /* If that is not the case passing NULL as the section's name to
31355        'get_named_section' will create a section with the declaration's
31356        section name.  */
31357     sec = get_named_section (decl, NULL, 0);
31358
31359   /* Set the SHF_ARM_PURECODE attribute.  */
31360   sec->common.flags |= SECTION_ARM_PURECODE;
31361
31362   return sec;
31363 }
31364
31365 /* Implements the TARGET_SECTION_FLAGS hook.
31366
31367    If DECL is a function declaration and pure-code is passed as an option
31368    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31369    section's name and RELOC indicates whether the declarations initializer may
31370    contain runtime relocations.  */
31371
31372 static unsigned int
31373 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31374 {
31375   unsigned int flags = default_section_type_flags (decl, name, reloc);
31376
31377   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31378     flags |= SECTION_ARM_PURECODE;
31379
31380   return flags;
31381 }
31382
31383 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31384
31385 static void
31386 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31387                            rtx op0, rtx op1,
31388                            rtx *quot_p, rtx *rem_p)
31389 {
31390   if (mode == SImode)
31391     gcc_assert (!TARGET_IDIV);
31392
31393   scalar_int_mode libval_mode
31394     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31395
31396   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31397                                         libval_mode,
31398                                         op0, GET_MODE (op0),
31399                                         op1, GET_MODE (op1));
31400
31401   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31402   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31403                                        GET_MODE_SIZE (mode));
31404
31405   gcc_assert (quotient);
31406   gcc_assert (remainder);
31407
31408   *quot_p = quotient;
31409   *rem_p = remainder;
31410 }
31411
31412 /*  This function checks for the availability of the coprocessor builtin passed
31413     in BUILTIN for the current target.  Returns true if it is available and
31414     false otherwise.  If a BUILTIN is passed for which this function has not
31415     been implemented it will cause an exception.  */
31416
31417 bool
31418 arm_coproc_builtin_available (enum unspecv builtin)
31419 {
31420   /* None of these builtins are available in Thumb mode if the target only
31421      supports Thumb-1.  */
31422   if (TARGET_THUMB1)
31423     return false;
31424
31425   switch (builtin)
31426     {
31427       case VUNSPEC_CDP:
31428       case VUNSPEC_LDC:
31429       case VUNSPEC_LDCL:
31430       case VUNSPEC_STC:
31431       case VUNSPEC_STCL:
31432       case VUNSPEC_MCR:
31433       case VUNSPEC_MRC:
31434         if (arm_arch4)
31435           return true;
31436         break;
31437       case VUNSPEC_CDP2:
31438       case VUNSPEC_LDC2:
31439       case VUNSPEC_LDC2L:
31440       case VUNSPEC_STC2:
31441       case VUNSPEC_STC2L:
31442       case VUNSPEC_MCR2:
31443       case VUNSPEC_MRC2:
31444         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31445            ARMv8-{A,M}.  */
31446         if (arm_arch5)
31447           return true;
31448         break;
31449       case VUNSPEC_MCRR:
31450       case VUNSPEC_MRRC:
31451         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31452            ARMv8-{A,M}.  */
31453         if (arm_arch6 || arm_arch5te)
31454           return true;
31455         break;
31456       case VUNSPEC_MCRR2:
31457       case VUNSPEC_MRRC2:
31458         if (arm_arch6)
31459           return true;
31460         break;
31461       default:
31462         gcc_unreachable ();
31463     }
31464   return false;
31465 }
31466
31467 /* This function returns true if OP is a valid memory operand for the ldc and
31468    stc coprocessor instructions and false otherwise.  */
31469
31470 bool
31471 arm_coproc_ldc_stc_legitimate_address (rtx op)
31472 {
31473   HOST_WIDE_INT range;
31474   /* Has to be a memory operand.  */
31475   if (!MEM_P (op))
31476     return false;
31477
31478   op = XEXP (op, 0);
31479
31480   /* We accept registers.  */
31481   if (REG_P (op))
31482     return true;
31483
31484   switch GET_CODE (op)
31485     {
31486       case PLUS:
31487         {
31488           /* Or registers with an offset.  */
31489           if (!REG_P (XEXP (op, 0)))
31490             return false;
31491
31492           op = XEXP (op, 1);
31493
31494           /* The offset must be an immediate though.  */
31495           if (!CONST_INT_P (op))
31496             return false;
31497
31498           range = INTVAL (op);
31499
31500           /* Within the range of [-1020,1020].  */
31501           if (!IN_RANGE (range, -1020, 1020))
31502             return false;
31503
31504           /* And a multiple of 4.  */
31505           return (range % 4) == 0;
31506         }
31507       case PRE_INC:
31508       case POST_INC:
31509       case PRE_DEC:
31510       case POST_DEC:
31511         return REG_P (XEXP (op, 0));
31512       default:
31513         gcc_unreachable ();
31514     }
31515   return false;
31516 }
31517
31518 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31519
31520    In VFPv1, VFP registers could only be accessed in the mode they were
31521    set, so subregs would be invalid there.  However, we don't support
31522    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31523
31524    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31525    VFP registers in little-endian order.  We can't describe that accurately to
31526    GCC, so avoid taking subregs of such values.
31527
31528    The only exception is going from a 128-bit to a 64-bit type.  In that
31529    case the data layout happens to be consistent for big-endian, so we
31530    explicitly allow that case.  */
31531
31532 static bool
31533 arm_can_change_mode_class (machine_mode from, machine_mode to,
31534                            reg_class_t rclass)
31535 {
31536   if (TARGET_BIG_END
31537       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31538       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31539           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31540       && reg_classes_intersect_p (VFP_REGS, rclass))
31541     return false;
31542   return true;
31543 }
31544
31545 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31546    strcpy from constants will be faster.  */
31547
31548 static HOST_WIDE_INT
31549 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31550 {
31551   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31552   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31553     return MAX (align, BITS_PER_WORD * factor);
31554   return align;
31555 }
31556
31557 #if CHECKING_P
31558 namespace selftest {
31559
31560 /* Scan the static data tables generated by parsecpu.awk looking for
31561    potential issues with the data.  We primarily check for
31562    inconsistencies in the option extensions at present (extensions
31563    that duplicate others but aren't marked as aliases).  Furthermore,
31564    for correct canonicalization later options must never be a subset
31565    of an earlier option.  Any extension should also only specify other
31566    feature bits and never an architecture bit.  The architecture is inferred
31567    from the declaration of the extension.  */
31568 static void
31569 arm_test_cpu_arch_data (void)
31570 {
31571   const arch_option *arch;
31572   const cpu_option *cpu;
31573   auto_sbitmap target_isa (isa_num_bits);
31574   auto_sbitmap isa1 (isa_num_bits);
31575   auto_sbitmap isa2 (isa_num_bits);
31576
31577   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31578     {
31579       const cpu_arch_extension *ext1, *ext2;
31580
31581       if (arch->common.extensions == NULL)
31582         continue;
31583
31584       arm_initialize_isa (target_isa, arch->common.isa_bits);
31585
31586       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31587         {
31588           if (ext1->alias)
31589             continue;
31590
31591           arm_initialize_isa (isa1, ext1->isa_bits);
31592           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31593             {
31594               if (ext2->alias || ext1->remove != ext2->remove)
31595                 continue;
31596
31597               arm_initialize_isa (isa2, ext2->isa_bits);
31598               /* If the option is a subset of the parent option, it doesn't
31599                  add anything and so isn't useful.  */
31600               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31601
31602               /* If the extension specifies any architectural bits then
31603                  disallow it.  Extensions should only specify feature bits.  */
31604               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31605             }
31606         }
31607     }
31608
31609   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31610     {
31611       const cpu_arch_extension *ext1, *ext2;
31612
31613       if (cpu->common.extensions == NULL)
31614         continue;
31615
31616       arm_initialize_isa (target_isa, arch->common.isa_bits);
31617
31618       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31619         {
31620           if (ext1->alias)
31621             continue;
31622
31623           arm_initialize_isa (isa1, ext1->isa_bits);
31624           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31625             {
31626               if (ext2->alias || ext1->remove != ext2->remove)
31627                 continue;
31628
31629               arm_initialize_isa (isa2, ext2->isa_bits);
31630               /* If the option is a subset of the parent option, it doesn't
31631                  add anything and so isn't useful.  */
31632               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31633
31634               /* If the extension specifies any architectural bits then
31635                  disallow it.  Extensions should only specify feature bits.  */
31636               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31637             }
31638         }
31639     }
31640 }
31641
31642 /* Scan the static data tables generated by parsecpu.awk looking for
31643    potential issues with the data.  Here we check for consistency between the
31644    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31645    a feature bit that is not defined by any FPU flag.  */
31646 static void
31647 arm_test_fpu_data (void)
31648 {
31649   auto_sbitmap isa_all_fpubits (isa_num_bits);
31650   auto_sbitmap fpubits (isa_num_bits);
31651   auto_sbitmap tmpset (isa_num_bits);
31652
31653   static const enum isa_feature fpu_bitlist[]
31654     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31655   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31656
31657   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31658   {
31659     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31660     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31661     bitmap_clear (isa_all_fpubits);
31662     bitmap_copy (isa_all_fpubits, tmpset);
31663   }
31664
31665   if (!bitmap_empty_p (isa_all_fpubits))
31666     {
31667         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31668                          " group that are not defined by any FPU.\n"
31669                          "       Check your arm-cpus.in.\n");
31670         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31671     }
31672 }
31673
31674 static void
31675 arm_run_selftests (void)
31676 {
31677   arm_test_cpu_arch_data ();
31678   arm_test_fpu_data ();
31679 }
31680 } /* Namespace selftest.  */
31681
31682 #undef TARGET_RUN_TARGET_SELFTESTS
31683 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31684 #endif /* CHECKING_P */
31685
31686 struct gcc_target targetm = TARGET_INITIALIZER;
31687
31688 #include "gt-arm.h"