gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 291                                              const unsigned char *sel);
 292
 293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 294
 295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 296                                            tree vectype,
 297                                            int misalign ATTRIBUTE_UNUSED);
 298 static unsigned arm_add_stmt_cost (void *data, int count,
 299                                    enum vect_cost_for_stmt kind,
 300                                    struct _stmt_vec_info *stmt_info,
 301                                    int misalign,
 302                                    enum vect_cost_model_location where);
 303
 304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 305                                          bool op0_preserve_value);
 306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 307
 308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 310                                      const_tree);
 311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 314                                                 int reloc);
 315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 \f
 320 /* Table of machine attributes.  */
 321 static const struct attribute_spec arm_attribute_table[] =
 322 {
 323   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 324        affects_type_identity } */
 325   /* Function calls made to this symbol must be done indirectly, because
 326      it may lie outside of the 26 bit addressing range of a normal function
 327      call.  */
 328   { "long_call",    0, 0, false, true,  true,  NULL, false },
 329   /* Whereas these functions are always known to reside within the 26 bit
 330      addressing range.  */
 331   { "short_call",   0, 0, false, true,  true,  NULL, false },
 332   /* Specify the procedure call conventions for a function.  */
 333   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 334     false },
 335   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 336   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 337     false },
 338   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 339     false },
 340   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 341     false },
 342 #ifdef ARM_PE
 343   /* ARM/PE has three new attributes:
 344      interfacearm - ?
 345      dllexport - for exporting a function/variable that will live in a dll
 346      dllimport - for importing a function/variable from a dll
 347
 348      Microsoft allows multiple declspecs in one __declspec, separating
 349      them with spaces.  We do NOT support this.  Instead, use __declspec
 350      multiple times.
 351   */
 352   { "dllimport",    0, 0, true,  false, false, NULL, false },
 353   { "dllexport",    0, 0, true,  false, false, NULL, false },
 354   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 355     false },
 356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 357   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 358   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 360     false },
 361 #endif
 362   /* ARMv8-M Security Extensions support.  */
 363   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 364     arm_handle_cmse_nonsecure_entry, false },
 365   { "cmse_nonsecure_call", 0, 0, true, false, false,
 366     arm_handle_cmse_nonsecure_call, true },
 367   { NULL,           0, 0, false, false, false, NULL, false }
 368 };
 369 \f
 370 /* Initialize the GCC target structure.  */
 371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 372 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 374 #endif
 375
 376 #undef TARGET_LEGITIMIZE_ADDRESS
 377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 378
 379 #undef  TARGET_ATTRIBUTE_TABLE
 380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 381
 382 #undef  TARGET_INSERT_ATTRIBUTES
 383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 384
 385 #undef TARGET_ASM_FILE_START
 386 #define TARGET_ASM_FILE_START arm_file_start
 387 #undef TARGET_ASM_FILE_END
 388 #define TARGET_ASM_FILE_END arm_file_end
 389
 390 #undef  TARGET_ASM_ALIGNED_SI_OP
 391 #define TARGET_ASM_ALIGNED_SI_OP NULL
 392 #undef  TARGET_ASM_INTEGER
 393 #define TARGET_ASM_INTEGER arm_assemble_integer
 394
 395 #undef TARGET_PRINT_OPERAND
 396 #define TARGET_PRINT_OPERAND arm_print_operand
 397 #undef TARGET_PRINT_OPERAND_ADDRESS
 398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 401
 402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 404
 405 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 407
 408 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 410
 411 #undef TARGET_CAN_INLINE_P
 412 #define TARGET_CAN_INLINE_P arm_can_inline_p
 413
 414 #undef TARGET_RELAYOUT_FUNCTION
 415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 416
 417 #undef  TARGET_OPTION_OVERRIDE
 418 #define TARGET_OPTION_OVERRIDE arm_option_override
 419
 420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 422
 423 #undef TARGET_OPTION_SAVE
 424 #define TARGET_OPTION_SAVE arm_option_save
 425
 426 #undef TARGET_OPTION_RESTORE
 427 #define TARGET_OPTION_RESTORE arm_option_restore
 428
 429 #undef TARGET_OPTION_PRINT
 430 #define TARGET_OPTION_PRINT arm_option_print
 431
 432 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 434
 435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 437
 438 #undef TARGET_SCHED_MACRO_FUSION_P
 439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 440
 441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 443
 444 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 446
 447 #undef  TARGET_SCHED_ADJUST_COST
 448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 449
 450 #undef TARGET_SET_CURRENT_FUNCTION
 451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 452
 453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 455
 456 #undef TARGET_SCHED_REORDER
 457 #define TARGET_SCHED_REORDER arm_sched_reorder
 458
 459 #undef TARGET_REGISTER_MOVE_COST
 460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 461
 462 #undef TARGET_MEMORY_MOVE_COST
 463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 464
 465 #undef TARGET_ENCODE_SECTION_INFO
 466 #ifdef ARM_PE
 467 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 468 #else
 469 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 470 #endif
 471
 472 #undef  TARGET_STRIP_NAME_ENCODING
 473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 474
 475 #undef  TARGET_ASM_INTERNAL_LABEL
 476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 477
 478 #undef TARGET_FLOATN_MODE
 479 #define TARGET_FLOATN_MODE arm_floatn_mode
 480
 481 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 483
 484 #undef  TARGET_FUNCTION_VALUE
 485 #define TARGET_FUNCTION_VALUE arm_function_value
 486
 487 #undef  TARGET_LIBCALL_VALUE
 488 #define TARGET_LIBCALL_VALUE arm_libcall_value
 489
 490 #undef TARGET_FUNCTION_VALUE_REGNO_P
 491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 492
 493 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 495 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 497
 498 #undef  TARGET_RTX_COSTS
 499 #define TARGET_RTX_COSTS arm_rtx_costs
 500 #undef  TARGET_ADDRESS_COST
 501 #define TARGET_ADDRESS_COST arm_address_cost
 502
 503 #undef TARGET_SHIFT_TRUNCATION_MASK
 504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 513   arm_autovectorize_vector_sizes
 514
 515 #undef  TARGET_MACHINE_DEPENDENT_REORG
 516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 517
 518 #undef  TARGET_INIT_BUILTINS
 519 #define TARGET_INIT_BUILTINS  arm_init_builtins
 520 #undef  TARGET_EXPAND_BUILTIN
 521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 522 #undef  TARGET_BUILTIN_DECL
 523 #define TARGET_BUILTIN_DECL arm_builtin_decl
 524
 525 #undef TARGET_INIT_LIBFUNCS
 526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 527
 528 #undef TARGET_PROMOTE_FUNCTION_MODE
 529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 530 #undef TARGET_PROMOTE_PROTOTYPES
 531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 532 #undef TARGET_PASS_BY_REFERENCE
 533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 534 #undef TARGET_ARG_PARTIAL_BYTES
 535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 536 #undef TARGET_FUNCTION_ARG
 537 #define TARGET_FUNCTION_ARG arm_function_arg
 538 #undef TARGET_FUNCTION_ARG_ADVANCE
 539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 540 #undef TARGET_FUNCTION_ARG_PADDING
 541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 542 #undef TARGET_FUNCTION_ARG_BOUNDARY
 543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 544
 545 #undef  TARGET_SETUP_INCOMING_VARARGS
 546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 547
 548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 550
 551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 553 #undef TARGET_TRAMPOLINE_INIT
 554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 557
 558 #undef TARGET_WARN_FUNC_RETURN
 559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 560
 561 #undef TARGET_DEFAULT_SHORT_ENUMS
 562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 563
 564 #undef TARGET_ALIGN_ANON_BITFIELD
 565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 566
 567 #undef TARGET_NARROW_VOLATILE_BITFIELD
 568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 569
 570 #undef TARGET_CXX_GUARD_TYPE
 571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 572
 573 #undef TARGET_CXX_GUARD_MASK_BIT
 574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 575
 576 #undef TARGET_CXX_GET_COOKIE_SIZE
 577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 578
 579 #undef TARGET_CXX_COOKIE_HAS_SIZE
 580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 581
 582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 584
 585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 587
 588 #undef TARGET_CXX_USE_AEABI_ATEXIT
 589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 590
 591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 593   arm_cxx_determine_class_data_visibility
 594
 595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 597
 598 #undef TARGET_RETURN_IN_MSB
 599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 600
 601 #undef TARGET_RETURN_IN_MEMORY
 602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 603
 604 #undef TARGET_MUST_PASS_IN_STACK
 605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 606
 607 #if ARM_UNWIND_INFO
 608 #undef TARGET_ASM_UNWIND_EMIT
 609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 610
 611 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 612 #undef TARGET_ASM_TTYPE
 613 #define TARGET_ASM_TTYPE arm_output_ttype
 614
 615 #undef TARGET_ARM_EABI_UNWINDER
 616 #define TARGET_ARM_EABI_UNWINDER true
 617
 618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 620
 621 #endif /* ARM_UNWIND_INFO */
 622
 623 #undef TARGET_ASM_INIT_SECTIONS
 624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 625
 626 #undef TARGET_DWARF_REGISTER_SPAN
 627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 628
 629 #undef  TARGET_CANNOT_COPY_INSN_P
 630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 631
 632 #ifdef HAVE_AS_TLS
 633 #undef TARGET_HAVE_TLS
 634 #define TARGET_HAVE_TLS true
 635 #endif
 636
 637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 639
 640 #undef TARGET_LEGITIMATE_CONSTANT_P
 641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 642
 643 #undef TARGET_CANNOT_FORCE_CONST_MEM
 644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 645
 646 #undef TARGET_MAX_ANCHOR_OFFSET
 647 #define TARGET_MAX_ANCHOR_OFFSET 4095
 648
 649 /* The minimum is set such that the total size of the block
 650    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 651    divisible by eight, ensuring natural spacing of anchors.  */
 652 #undef TARGET_MIN_ANCHOR_OFFSET
 653 #define TARGET_MIN_ANCHOR_OFFSET -4088
 654
 655 #undef TARGET_SCHED_ISSUE_RATE
 656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 657
 658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 660   arm_first_cycle_multipass_dfa_lookahead
 661
 662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 664   arm_first_cycle_multipass_dfa_lookahead_guard
 665
 666 #undef TARGET_MANGLE_TYPE
 667 #define TARGET_MANGLE_TYPE arm_mangle_type
 668
 669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 671
 672 #undef TARGET_BUILD_BUILTIN_VA_LIST
 673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 674 #undef TARGET_EXPAND_BUILTIN_VA_START
 675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 678
 679 #ifdef HAVE_AS_TLS
 680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 682 #endif
 683
 684 #undef TARGET_LEGITIMATE_ADDRESS_P
 685 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 686
 687 #undef TARGET_PREFERRED_RELOAD_CLASS
 688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 689
 690 #undef TARGET_PROMOTED_TYPE
 691 #define TARGET_PROMOTED_TYPE arm_promoted_type
 692
 693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 695
 696 #undef TARGET_COMPUTE_FRAME_LAYOUT
 697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 698
 699 #undef TARGET_FRAME_POINTER_REQUIRED
 700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 701
 702 #undef TARGET_CAN_ELIMINATE
 703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 704
 705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 707
 708 #undef TARGET_CLASS_LIKELY_SPILLED_P
 709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 710
 711 #undef TARGET_VECTORIZE_BUILTINS
 712 #define TARGET_VECTORIZE_BUILTINS
 713
 714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 716   arm_builtin_vectorized_function
 717
 718 #undef TARGET_VECTOR_ALIGNMENT
 719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 720
 721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 723   arm_vector_alignment_reachable
 724
 725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 727   arm_builtin_support_vector_misalignment
 728
 729 #undef TARGET_PREFERRED_RENAME_CLASS
 730 #define TARGET_PREFERRED_RENAME_CLASS \
 731   arm_preferred_rename_class
 732
 733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 735   arm_vectorize_vec_perm_const_ok
 736
 737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 739   arm_builtin_vectorization_cost
 740 #undef TARGET_VECTORIZE_ADD_STMT_COST
 741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 742
 743 #undef TARGET_CANONICALIZE_COMPARISON
 744 #define TARGET_CANONICALIZE_COMPARISON \
 745   arm_canonicalize_comparison
 746
 747 #undef TARGET_ASAN_SHADOW_OFFSET
 748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 749
 750 #undef MAX_INSN_PER_IT_BLOCK
 751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 752
 753 #undef TARGET_CAN_USE_DOLOOP_P
 754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 755
 756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 758
 759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 761
 762 #undef TARGET_SCHED_FUSION_PRIORITY
 763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 764
 765 #undef  TARGET_ASM_FUNCTION_SECTION
 766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 767
 768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 770
 771 #undef TARGET_SECTION_TYPE_FLAGS
 772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 773
 774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 776
 777 #undef TARGET_C_EXCESS_PRECISION
 778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 779
 780 /* Although the architecture reserves bits 0 and 1, only the former is
 781    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 784
 785 #undef TARGET_FIXED_CONDITION_CODE_REGS
 786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 787
 788 #undef TARGET_HARD_REGNO_MODE_OK
 789 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 790
 791 #undef TARGET_MODES_TIEABLE_P
 792 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 793 \f
 794 /* Obstack for minipool constant handling.  */
 795 static struct obstack minipool_obstack;
 796 static char *         minipool_startobj;
 797
 798 /* The maximum number of insns skipped which
 799    will be conditionalised if possible.  */
 800 static int max_insns_skipped = 5;
 801
 802 extern FILE * asm_out_file;
 803
 804 /* True if we are currently building a constant table.  */
 805 int making_const_table;
 806
 807 /* The processor for which instructions should be scheduled.  */
 808 enum processor_type arm_tune = TARGET_CPU_arm_none;
 809
 810 /* The current tuning set.  */
 811 const struct tune_params *current_tune;
 812
 813 /* Which floating point hardware to schedule for.  */
 814 int arm_fpu_attr;
 815
 816 /* Used for Thumb call_via trampolines.  */
 817 rtx thumb_call_via_label[14];
 818 static int thumb_call_reg_needed;
 819
 820 /* The bits in this mask specify which instruction scheduling options should
 821    be used.  */
 822 unsigned int tune_flags = 0;
 823
 824 /* The highest ARM architecture version supported by the
 825    target.  */
 826 enum base_architecture arm_base_arch = BASE_ARCH_0;
 827
 828 /* Active target architecture and tuning.  */
 829
 830 struct arm_build_target arm_active_target;
 831
 832 /* The following are used in the arm.md file as equivalents to bits
 833    in the above two flag variables.  */
 834
 835 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 836 int arm_arch3m = 0;
 837
 838 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 839 int arm_arch4 = 0;
 840
 841 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 842 int arm_arch4t = 0;
 843
 844 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 845 int arm_arch5 = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 848 int arm_arch5e = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 851 int arm_arch5te = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 854 int arm_arch6 = 0;
 855
 856 /* Nonzero if this chip supports the ARM 6K extensions.  */
 857 int arm_arch6k = 0;
 858
 859 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 860 int arm_arch6kz = 0;
 861
 862 /* Nonzero if instructions present in ARMv6-M can be used.  */
 863 int arm_arch6m = 0;
 864
 865 /* Nonzero if this chip supports the ARM 7 extensions.  */
 866 int arm_arch7 = 0;
 867
 868 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 869 int arm_arch_lpae = 0;
 870
 871 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 872 int arm_arch_notm = 0;
 873
 874 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 875 int arm_arch7em = 0;
 876
 877 /* Nonzero if instructions present in ARMv8 can be used.  */
 878 int arm_arch8 = 0;
 879
 880 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 881 int arm_arch8_1 = 0;
 882
 883 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 884 int arm_arch8_2 = 0;
 885
 886 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 887    Architecture 8.2.  */
 888 int arm_fp16_inst = 0;
 889
 890 /* Nonzero if this chip can benefit from load scheduling.  */
 891 int arm_ld_sched = 0;
 892
 893 /* Nonzero if this chip is a StrongARM.  */
 894 int arm_tune_strongarm = 0;
 895
 896 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 897 int arm_arch_iwmmxt = 0;
 898
 899 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 900 int arm_arch_iwmmxt2 = 0;
 901
 902 /* Nonzero if this chip is an XScale.  */
 903 int arm_arch_xscale = 0;
 904
 905 /* Nonzero if tuning for XScale  */
 906 int arm_tune_xscale = 0;
 907
 908 /* Nonzero if we want to tune for stores that access the write-buffer.
 909    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 910 int arm_tune_wbuf = 0;
 911
 912 /* Nonzero if tuning for Cortex-A9.  */
 913 int arm_tune_cortex_a9 = 0;
 914
 915 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 916    preprocessor.
 917    XXX This is a bit of a hack, it's intended to help work around
 918    problems in GLD which doesn't understand that armv5t code is
 919    interworking clean.  */
 920 int arm_cpp_interwork = 0;
 921
 922 /* Nonzero if chip supports Thumb 1.  */
 923 int arm_arch_thumb1;
 924
 925 /* Nonzero if chip supports Thumb 2.  */
 926 int arm_arch_thumb2;
 927
 928 /* Nonzero if chip supports integer division instruction.  */
 929 int arm_arch_arm_hwdiv;
 930 int arm_arch_thumb_hwdiv;
 931
 932 /* Nonzero if chip disallows volatile memory access in IT block.  */
 933 int arm_arch_no_volatile_ce;
 934
 935 /* Nonzero if we should use Neon to handle 64-bits operations rather
 936    than core registers.  */
 937 int prefer_neon_for_64bits = 0;
 938
 939 /* Nonzero if we shouldn't use literal pools.  */
 940 bool arm_disable_literal_pool = false;
 941
 942 /* The register number to be used for the PIC offset register.  */
 943 unsigned arm_pic_register = INVALID_REGNUM;
 944
 945 enum arm_pcs arm_pcs_default;
 946
 947 /* For an explanation of these variables, see final_prescan_insn below.  */
 948 int arm_ccfsm_state;
 949 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 950 enum arm_cond_code arm_current_cc;
 951
 952 rtx arm_target_insn;
 953 int arm_target_label;
 954 /* The number of conditionally executed insns, including the current insn.  */
 955 int arm_condexec_count = 0;
 956 /* A bitmask specifying the patterns for the IT block.
 957    Zero means do not output an IT block before this insn. */
 958 int arm_condexec_mask = 0;
 959 /* The number of bits used in arm_condexec_mask.  */
 960 int arm_condexec_masklen = 0;
 961
 962 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 963 int arm_arch_crc = 0;
 964
 965 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 966 int arm_arch_cmse = 0;
 967
 968 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 969 int arm_m_profile_small_mul = 0;
 970
 971 /* The condition codes of the ARM, and the inverse function.  */
 972 static const char * const arm_condition_codes[] =
 973 {
 974   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 975   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 976 };
 977
 978 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 979 int arm_regs_in_sequence[] =
 980 {
 981   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 982 };
 983
 984 #define ARM_LSL_NAME "lsl"
 985 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 986
 987 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 988                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 989                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 990 \f
 991 /* Initialization code.  */
 992
 993 struct cpu_tune
 994 {
 995   enum processor_type scheduler;
 996   unsigned int tune_flags;
 997   const struct tune_params *tune;
 998 };
 999
1000 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1001 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1002   {                                                             \
1003     num_slots,                                                  \
1004     l1_size,                                                    \
1005     l1_line_size                                                \
1006   }
1007
1008 /* arm generic vectorizer costs.  */
1009 static const
1010 struct cpu_vec_costs arm_default_vec_cost = {
1011   1,                                    /* scalar_stmt_cost.  */
1012   1,                                    /* scalar load_cost.  */
1013   1,                                    /* scalar_store_cost.  */
1014   1,                                    /* vec_stmt_cost.  */
1015   1,                                    /* vec_to_scalar_cost.  */
1016   1,                                    /* scalar_to_vec_cost.  */
1017   1,                                    /* vec_align_load_cost.  */
1018   1,                                    /* vec_unalign_load_cost.  */
1019   1,                                    /* vec_unalign_store_cost.  */
1020   1,                                    /* vec_store_cost.  */
1021   3,                                    /* cond_taken_branch_cost.  */
1022   1,                                    /* cond_not_taken_branch_cost.  */
1023 };
1024
1025 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1026 #include "aarch-cost-tables.h"
1027
1028
1029
1030 const struct cpu_cost_table cortexa9_extra_costs =
1031 {
1032   /* ALU */
1033   {
1034     0,                  /* arith.  */
1035     0,                  /* logical.  */
1036     0,                  /* shift.  */
1037     COSTS_N_INSNS (1),  /* shift_reg.  */
1038     COSTS_N_INSNS (1),  /* arith_shift.  */
1039     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1040     0,                  /* log_shift.  */
1041     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1042     COSTS_N_INSNS (1),  /* extend.  */
1043     COSTS_N_INSNS (2),  /* extend_arith.  */
1044     COSTS_N_INSNS (1),  /* bfi.  */
1045     COSTS_N_INSNS (1),  /* bfx.  */
1046     0,                  /* clz.  */
1047     0,                  /* rev.  */
1048     0,                  /* non_exec.  */
1049     true                /* non_exec_costs_exec.  */
1050   },
1051   {
1052     /* MULT SImode */
1053     {
1054       COSTS_N_INSNS (3),        /* simple.  */
1055       COSTS_N_INSNS (3),        /* flag_setting.  */
1056       COSTS_N_INSNS (2),        /* extend.  */
1057       COSTS_N_INSNS (3),        /* add.  */
1058       COSTS_N_INSNS (2),        /* extend_add.  */
1059       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1060     },
1061     /* MULT DImode */
1062     {
1063       0,                        /* simple (N/A).  */
1064       0,                        /* flag_setting (N/A).  */
1065       COSTS_N_INSNS (4),        /* extend.  */
1066       0,                        /* add (N/A).  */
1067       COSTS_N_INSNS (4),        /* extend_add.  */
1068       0                         /* idiv (N/A).  */
1069     }
1070   },
1071   /* LD/ST */
1072   {
1073     COSTS_N_INSNS (2),  /* load.  */
1074     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1075     COSTS_N_INSNS (2),  /* ldrd.  */
1076     COSTS_N_INSNS (2),  /* ldm_1st.  */
1077     1,                  /* ldm_regs_per_insn_1st.  */
1078     2,                  /* ldm_regs_per_insn_subsequent.  */
1079     COSTS_N_INSNS (5),  /* loadf.  */
1080     COSTS_N_INSNS (5),  /* loadd.  */
1081     COSTS_N_INSNS (1),  /* load_unaligned.  */
1082     COSTS_N_INSNS (2),  /* store.  */
1083     COSTS_N_INSNS (2),  /* strd.  */
1084     COSTS_N_INSNS (2),  /* stm_1st.  */
1085     1,                  /* stm_regs_per_insn_1st.  */
1086     2,                  /* stm_regs_per_insn_subsequent.  */
1087     COSTS_N_INSNS (1),  /* storef.  */
1088     COSTS_N_INSNS (1),  /* stored.  */
1089     COSTS_N_INSNS (1),  /* store_unaligned.  */
1090     COSTS_N_INSNS (1),  /* loadv.  */
1091     COSTS_N_INSNS (1)   /* storev.  */
1092   },
1093   {
1094     /* FP SFmode */
1095     {
1096       COSTS_N_INSNS (14),       /* div.  */
1097       COSTS_N_INSNS (4),        /* mult.  */
1098       COSTS_N_INSNS (7),        /* mult_addsub. */
1099       COSTS_N_INSNS (30),       /* fma.  */
1100       COSTS_N_INSNS (3),        /* addsub.  */
1101       COSTS_N_INSNS (1),        /* fpconst.  */
1102       COSTS_N_INSNS (1),        /* neg.  */
1103       COSTS_N_INSNS (3),        /* compare.  */
1104       COSTS_N_INSNS (3),        /* widen.  */
1105       COSTS_N_INSNS (3),        /* narrow.  */
1106       COSTS_N_INSNS (3),        /* toint.  */
1107       COSTS_N_INSNS (3),        /* fromint.  */
1108       COSTS_N_INSNS (3)         /* roundint.  */
1109     },
1110     /* FP DFmode */
1111     {
1112       COSTS_N_INSNS (24),       /* div.  */
1113       COSTS_N_INSNS (5),        /* mult.  */
1114       COSTS_N_INSNS (8),        /* mult_addsub.  */
1115       COSTS_N_INSNS (30),       /* fma.  */
1116       COSTS_N_INSNS (3),        /* addsub.  */
1117       COSTS_N_INSNS (1),        /* fpconst.  */
1118       COSTS_N_INSNS (1),        /* neg.  */
1119       COSTS_N_INSNS (3),        /* compare.  */
1120       COSTS_N_INSNS (3),        /* widen.  */
1121       COSTS_N_INSNS (3),        /* narrow.  */
1122       COSTS_N_INSNS (3),        /* toint.  */
1123       COSTS_N_INSNS (3),        /* fromint.  */
1124       COSTS_N_INSNS (3)         /* roundint.  */
1125     }
1126   },
1127   /* Vector */
1128   {
1129     COSTS_N_INSNS (1)   /* alu.  */
1130   }
1131 };
1132
1133 const struct cpu_cost_table cortexa8_extra_costs =
1134 {
1135   /* ALU */
1136   {
1137     0,                  /* arith.  */
1138     0,                  /* logical.  */
1139     COSTS_N_INSNS (1),  /* shift.  */
1140     0,                  /* shift_reg.  */
1141     COSTS_N_INSNS (1),  /* arith_shift.  */
1142     0,                  /* arith_shift_reg.  */
1143     COSTS_N_INSNS (1),  /* log_shift.  */
1144     0,                  /* log_shift_reg.  */
1145     0,                  /* extend.  */
1146     0,                  /* extend_arith.  */
1147     0,                  /* bfi.  */
1148     0,                  /* bfx.  */
1149     0,                  /* clz.  */
1150     0,                  /* rev.  */
1151     0,                  /* non_exec.  */
1152     true                /* non_exec_costs_exec.  */
1153   },
1154   {
1155     /* MULT SImode */
1156     {
1157       COSTS_N_INSNS (1),        /* simple.  */
1158       COSTS_N_INSNS (1),        /* flag_setting.  */
1159       COSTS_N_INSNS (1),        /* extend.  */
1160       COSTS_N_INSNS (1),        /* add.  */
1161       COSTS_N_INSNS (1),        /* extend_add.  */
1162       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1163     },
1164     /* MULT DImode */
1165     {
1166       0,                        /* simple (N/A).  */
1167       0,                        /* flag_setting (N/A).  */
1168       COSTS_N_INSNS (2),        /* extend.  */
1169       0,                        /* add (N/A).  */
1170       COSTS_N_INSNS (2),        /* extend_add.  */
1171       0                         /* idiv (N/A).  */
1172     }
1173   },
1174   /* LD/ST */
1175   {
1176     COSTS_N_INSNS (1),  /* load.  */
1177     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1178     COSTS_N_INSNS (1),  /* ldrd.  */
1179     COSTS_N_INSNS (1),  /* ldm_1st.  */
1180     1,                  /* ldm_regs_per_insn_1st.  */
1181     2,                  /* ldm_regs_per_insn_subsequent.  */
1182     COSTS_N_INSNS (1),  /* loadf.  */
1183     COSTS_N_INSNS (1),  /* loadd.  */
1184     COSTS_N_INSNS (1),  /* load_unaligned.  */
1185     COSTS_N_INSNS (1),  /* store.  */
1186     COSTS_N_INSNS (1),  /* strd.  */
1187     COSTS_N_INSNS (1),  /* stm_1st.  */
1188     1,                  /* stm_regs_per_insn_1st.  */
1189     2,                  /* stm_regs_per_insn_subsequent.  */
1190     COSTS_N_INSNS (1),  /* storef.  */
1191     COSTS_N_INSNS (1),  /* stored.  */
1192     COSTS_N_INSNS (1),  /* store_unaligned.  */
1193     COSTS_N_INSNS (1),  /* loadv.  */
1194     COSTS_N_INSNS (1)   /* storev.  */
1195   },
1196   {
1197     /* FP SFmode */
1198     {
1199       COSTS_N_INSNS (36),       /* div.  */
1200       COSTS_N_INSNS (11),       /* mult.  */
1201       COSTS_N_INSNS (20),       /* mult_addsub. */
1202       COSTS_N_INSNS (30),       /* fma.  */
1203       COSTS_N_INSNS (9),        /* addsub.  */
1204       COSTS_N_INSNS (3),        /* fpconst.  */
1205       COSTS_N_INSNS (3),        /* neg.  */
1206       COSTS_N_INSNS (6),        /* compare.  */
1207       COSTS_N_INSNS (4),        /* widen.  */
1208       COSTS_N_INSNS (4),        /* narrow.  */
1209       COSTS_N_INSNS (8),        /* toint.  */
1210       COSTS_N_INSNS (8),        /* fromint.  */
1211       COSTS_N_INSNS (8)         /* roundint.  */
1212     },
1213     /* FP DFmode */
1214     {
1215       COSTS_N_INSNS (64),       /* div.  */
1216       COSTS_N_INSNS (16),       /* mult.  */
1217       COSTS_N_INSNS (25),       /* mult_addsub.  */
1218       COSTS_N_INSNS (30),       /* fma.  */
1219       COSTS_N_INSNS (9),        /* addsub.  */
1220       COSTS_N_INSNS (3),        /* fpconst.  */
1221       COSTS_N_INSNS (3),        /* neg.  */
1222       COSTS_N_INSNS (6),        /* compare.  */
1223       COSTS_N_INSNS (6),        /* widen.  */
1224       COSTS_N_INSNS (6),        /* narrow.  */
1225       COSTS_N_INSNS (8),        /* toint.  */
1226       COSTS_N_INSNS (8),        /* fromint.  */
1227       COSTS_N_INSNS (8)         /* roundint.  */
1228     }
1229   },
1230   /* Vector */
1231   {
1232     COSTS_N_INSNS (1)   /* alu.  */
1233   }
1234 };
1235
1236 const struct cpu_cost_table cortexa5_extra_costs =
1237 {
1238   /* ALU */
1239   {
1240     0,                  /* arith.  */
1241     0,                  /* logical.  */
1242     COSTS_N_INSNS (1),  /* shift.  */
1243     COSTS_N_INSNS (1),  /* shift_reg.  */
1244     COSTS_N_INSNS (1),  /* arith_shift.  */
1245     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1246     COSTS_N_INSNS (1),  /* log_shift.  */
1247     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1248     COSTS_N_INSNS (1),  /* extend.  */
1249     COSTS_N_INSNS (1),  /* extend_arith.  */
1250     COSTS_N_INSNS (1),  /* bfi.  */
1251     COSTS_N_INSNS (1),  /* bfx.  */
1252     COSTS_N_INSNS (1),  /* clz.  */
1253     COSTS_N_INSNS (1),  /* rev.  */
1254     0,                  /* non_exec.  */
1255     true                /* non_exec_costs_exec.  */
1256   },
1257
1258   {
1259     /* MULT SImode */
1260     {
1261       0,                        /* simple.  */
1262       COSTS_N_INSNS (1),        /* flag_setting.  */
1263       COSTS_N_INSNS (1),        /* extend.  */
1264       COSTS_N_INSNS (1),        /* add.  */
1265       COSTS_N_INSNS (1),        /* extend_add.  */
1266       COSTS_N_INSNS (7)         /* idiv.  */
1267     },
1268     /* MULT DImode */
1269     {
1270       0,                        /* simple (N/A).  */
1271       0,                        /* flag_setting (N/A).  */
1272       COSTS_N_INSNS (1),        /* extend.  */
1273       0,                        /* add.  */
1274       COSTS_N_INSNS (2),        /* extend_add.  */
1275       0                         /* idiv (N/A).  */
1276     }
1277   },
1278   /* LD/ST */
1279   {
1280     COSTS_N_INSNS (1),  /* load.  */
1281     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1282     COSTS_N_INSNS (6),  /* ldrd.  */
1283     COSTS_N_INSNS (1),  /* ldm_1st.  */
1284     1,                  /* ldm_regs_per_insn_1st.  */
1285     2,                  /* ldm_regs_per_insn_subsequent.  */
1286     COSTS_N_INSNS (2),  /* loadf.  */
1287     COSTS_N_INSNS (4),  /* loadd.  */
1288     COSTS_N_INSNS (1),  /* load_unaligned.  */
1289     COSTS_N_INSNS (1),  /* store.  */
1290     COSTS_N_INSNS (3),  /* strd.  */
1291     COSTS_N_INSNS (1),  /* stm_1st.  */
1292     1,                  /* stm_regs_per_insn_1st.  */
1293     2,                  /* stm_regs_per_insn_subsequent.  */
1294     COSTS_N_INSNS (2),  /* storef.  */
1295     COSTS_N_INSNS (2),  /* stored.  */
1296     COSTS_N_INSNS (1),  /* store_unaligned.  */
1297     COSTS_N_INSNS (1),  /* loadv.  */
1298     COSTS_N_INSNS (1)   /* storev.  */
1299   },
1300   {
1301     /* FP SFmode */
1302     {
1303       COSTS_N_INSNS (15),       /* div.  */
1304       COSTS_N_INSNS (3),        /* mult.  */
1305       COSTS_N_INSNS (7),        /* mult_addsub. */
1306       COSTS_N_INSNS (7),        /* fma.  */
1307       COSTS_N_INSNS (3),        /* addsub.  */
1308       COSTS_N_INSNS (3),        /* fpconst.  */
1309       COSTS_N_INSNS (3),        /* neg.  */
1310       COSTS_N_INSNS (3),        /* compare.  */
1311       COSTS_N_INSNS (3),        /* widen.  */
1312       COSTS_N_INSNS (3),        /* narrow.  */
1313       COSTS_N_INSNS (3),        /* toint.  */
1314       COSTS_N_INSNS (3),        /* fromint.  */
1315       COSTS_N_INSNS (3)         /* roundint.  */
1316     },
1317     /* FP DFmode */
1318     {
1319       COSTS_N_INSNS (30),       /* div.  */
1320       COSTS_N_INSNS (6),        /* mult.  */
1321       COSTS_N_INSNS (10),       /* mult_addsub.  */
1322       COSTS_N_INSNS (7),        /* fma.  */
1323       COSTS_N_INSNS (3),        /* addsub.  */
1324       COSTS_N_INSNS (3),        /* fpconst.  */
1325       COSTS_N_INSNS (3),        /* neg.  */
1326       COSTS_N_INSNS (3),        /* compare.  */
1327       COSTS_N_INSNS (3),        /* widen.  */
1328       COSTS_N_INSNS (3),        /* narrow.  */
1329       COSTS_N_INSNS (3),        /* toint.  */
1330       COSTS_N_INSNS (3),        /* fromint.  */
1331       COSTS_N_INSNS (3)         /* roundint.  */
1332     }
1333   },
1334   /* Vector */
1335   {
1336     COSTS_N_INSNS (1)   /* alu.  */
1337   }
1338 };
1339
1340
1341 const struct cpu_cost_table cortexa7_extra_costs =
1342 {
1343   /* ALU */
1344   {
1345     0,                  /* arith.  */
1346     0,                  /* logical.  */
1347     COSTS_N_INSNS (1),  /* shift.  */
1348     COSTS_N_INSNS (1),  /* shift_reg.  */
1349     COSTS_N_INSNS (1),  /* arith_shift.  */
1350     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1351     COSTS_N_INSNS (1),  /* log_shift.  */
1352     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1353     COSTS_N_INSNS (1),  /* extend.  */
1354     COSTS_N_INSNS (1),  /* extend_arith.  */
1355     COSTS_N_INSNS (1),  /* bfi.  */
1356     COSTS_N_INSNS (1),  /* bfx.  */
1357     COSTS_N_INSNS (1),  /* clz.  */
1358     COSTS_N_INSNS (1),  /* rev.  */
1359     0,                  /* non_exec.  */
1360     true                /* non_exec_costs_exec.  */
1361   },
1362
1363   {
1364     /* MULT SImode */
1365     {
1366       0,                        /* simple.  */
1367       COSTS_N_INSNS (1),        /* flag_setting.  */
1368       COSTS_N_INSNS (1),        /* extend.  */
1369       COSTS_N_INSNS (1),        /* add.  */
1370       COSTS_N_INSNS (1),        /* extend_add.  */
1371       COSTS_N_INSNS (7)         /* idiv.  */
1372     },
1373     /* MULT DImode */
1374     {
1375       0,                        /* simple (N/A).  */
1376       0,                        /* flag_setting (N/A).  */
1377       COSTS_N_INSNS (1),        /* extend.  */
1378       0,                        /* add.  */
1379       COSTS_N_INSNS (2),        /* extend_add.  */
1380       0                         /* idiv (N/A).  */
1381     }
1382   },
1383   /* LD/ST */
1384   {
1385     COSTS_N_INSNS (1),  /* load.  */
1386     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1387     COSTS_N_INSNS (3),  /* ldrd.  */
1388     COSTS_N_INSNS (1),  /* ldm_1st.  */
1389     1,                  /* ldm_regs_per_insn_1st.  */
1390     2,                  /* ldm_regs_per_insn_subsequent.  */
1391     COSTS_N_INSNS (2),  /* loadf.  */
1392     COSTS_N_INSNS (2),  /* loadd.  */
1393     COSTS_N_INSNS (1),  /* load_unaligned.  */
1394     COSTS_N_INSNS (1),  /* store.  */
1395     COSTS_N_INSNS (3),  /* strd.  */
1396     COSTS_N_INSNS (1),  /* stm_1st.  */
1397     1,                  /* stm_regs_per_insn_1st.  */
1398     2,                  /* stm_regs_per_insn_subsequent.  */
1399     COSTS_N_INSNS (2),  /* storef.  */
1400     COSTS_N_INSNS (2),  /* stored.  */
1401     COSTS_N_INSNS (1),  /* store_unaligned.  */
1402     COSTS_N_INSNS (1),  /* loadv.  */
1403     COSTS_N_INSNS (1)   /* storev.  */
1404   },
1405   {
1406     /* FP SFmode */
1407     {
1408       COSTS_N_INSNS (15),       /* div.  */
1409       COSTS_N_INSNS (3),        /* mult.  */
1410       COSTS_N_INSNS (7),        /* mult_addsub. */
1411       COSTS_N_INSNS (7),        /* fma.  */
1412       COSTS_N_INSNS (3),        /* addsub.  */
1413       COSTS_N_INSNS (3),        /* fpconst.  */
1414       COSTS_N_INSNS (3),        /* neg.  */
1415       COSTS_N_INSNS (3),        /* compare.  */
1416       COSTS_N_INSNS (3),        /* widen.  */
1417       COSTS_N_INSNS (3),        /* narrow.  */
1418       COSTS_N_INSNS (3),        /* toint.  */
1419       COSTS_N_INSNS (3),        /* fromint.  */
1420       COSTS_N_INSNS (3)         /* roundint.  */
1421     },
1422     /* FP DFmode */
1423     {
1424       COSTS_N_INSNS (30),       /* div.  */
1425       COSTS_N_INSNS (6),        /* mult.  */
1426       COSTS_N_INSNS (10),       /* mult_addsub.  */
1427       COSTS_N_INSNS (7),        /* fma.  */
1428       COSTS_N_INSNS (3),        /* addsub.  */
1429       COSTS_N_INSNS (3),        /* fpconst.  */
1430       COSTS_N_INSNS (3),        /* neg.  */
1431       COSTS_N_INSNS (3),        /* compare.  */
1432       COSTS_N_INSNS (3),        /* widen.  */
1433       COSTS_N_INSNS (3),        /* narrow.  */
1434       COSTS_N_INSNS (3),        /* toint.  */
1435       COSTS_N_INSNS (3),        /* fromint.  */
1436       COSTS_N_INSNS (3)         /* roundint.  */
1437     }
1438   },
1439   /* Vector */
1440   {
1441     COSTS_N_INSNS (1)   /* alu.  */
1442   }
1443 };
1444
1445 const struct cpu_cost_table cortexa12_extra_costs =
1446 {
1447   /* ALU */
1448   {
1449     0,                  /* arith.  */
1450     0,                  /* logical.  */
1451     0,                  /* shift.  */
1452     COSTS_N_INSNS (1),  /* shift_reg.  */
1453     COSTS_N_INSNS (1),  /* arith_shift.  */
1454     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1455     COSTS_N_INSNS (1),  /* log_shift.  */
1456     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1457     0,                  /* extend.  */
1458     COSTS_N_INSNS (1),  /* extend_arith.  */
1459     0,                  /* bfi.  */
1460     COSTS_N_INSNS (1),  /* bfx.  */
1461     COSTS_N_INSNS (1),  /* clz.  */
1462     COSTS_N_INSNS (1),  /* rev.  */
1463     0,                  /* non_exec.  */
1464     true                /* non_exec_costs_exec.  */
1465   },
1466   /* MULT SImode */
1467   {
1468     {
1469       COSTS_N_INSNS (2),        /* simple.  */
1470       COSTS_N_INSNS (3),        /* flag_setting.  */
1471       COSTS_N_INSNS (2),        /* extend.  */
1472       COSTS_N_INSNS (3),        /* add.  */
1473       COSTS_N_INSNS (2),        /* extend_add.  */
1474       COSTS_N_INSNS (18)        /* idiv.  */
1475     },
1476     /* MULT DImode */
1477     {
1478       0,                        /* simple (N/A).  */
1479       0,                        /* flag_setting (N/A).  */
1480       COSTS_N_INSNS (3),        /* extend.  */
1481       0,                        /* add (N/A).  */
1482       COSTS_N_INSNS (3),        /* extend_add.  */
1483       0                         /* idiv (N/A).  */
1484     }
1485   },
1486   /* LD/ST */
1487   {
1488     COSTS_N_INSNS (3),  /* load.  */
1489     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1490     COSTS_N_INSNS (3),  /* ldrd.  */
1491     COSTS_N_INSNS (3),  /* ldm_1st.  */
1492     1,                  /* ldm_regs_per_insn_1st.  */
1493     2,                  /* ldm_regs_per_insn_subsequent.  */
1494     COSTS_N_INSNS (3),  /* loadf.  */
1495     COSTS_N_INSNS (3),  /* loadd.  */
1496     0,                  /* load_unaligned.  */
1497     0,                  /* store.  */
1498     0,                  /* strd.  */
1499     0,                  /* stm_1st.  */
1500     1,                  /* stm_regs_per_insn_1st.  */
1501     2,                  /* stm_regs_per_insn_subsequent.  */
1502     COSTS_N_INSNS (2),  /* storef.  */
1503     COSTS_N_INSNS (2),  /* stored.  */
1504     0,                  /* store_unaligned.  */
1505     COSTS_N_INSNS (1),  /* loadv.  */
1506     COSTS_N_INSNS (1)   /* storev.  */
1507   },
1508   {
1509     /* FP SFmode */
1510     {
1511       COSTS_N_INSNS (17),       /* div.  */
1512       COSTS_N_INSNS (4),        /* mult.  */
1513       COSTS_N_INSNS (8),        /* mult_addsub. */
1514       COSTS_N_INSNS (8),        /* fma.  */
1515       COSTS_N_INSNS (4),        /* addsub.  */
1516       COSTS_N_INSNS (2),        /* fpconst. */
1517       COSTS_N_INSNS (2),        /* neg.  */
1518       COSTS_N_INSNS (2),        /* compare.  */
1519       COSTS_N_INSNS (4),        /* widen.  */
1520       COSTS_N_INSNS (4),        /* narrow.  */
1521       COSTS_N_INSNS (4),        /* toint.  */
1522       COSTS_N_INSNS (4),        /* fromint.  */
1523       COSTS_N_INSNS (4)         /* roundint.  */
1524     },
1525     /* FP DFmode */
1526     {
1527       COSTS_N_INSNS (31),       /* div.  */
1528       COSTS_N_INSNS (4),        /* mult.  */
1529       COSTS_N_INSNS (8),        /* mult_addsub.  */
1530       COSTS_N_INSNS (8),        /* fma.  */
1531       COSTS_N_INSNS (4),        /* addsub.  */
1532       COSTS_N_INSNS (2),        /* fpconst.  */
1533       COSTS_N_INSNS (2),        /* neg.  */
1534       COSTS_N_INSNS (2),        /* compare.  */
1535       COSTS_N_INSNS (4),        /* widen.  */
1536       COSTS_N_INSNS (4),        /* narrow.  */
1537       COSTS_N_INSNS (4),        /* toint.  */
1538       COSTS_N_INSNS (4),        /* fromint.  */
1539       COSTS_N_INSNS (4)         /* roundint.  */
1540     }
1541   },
1542   /* Vector */
1543   {
1544     COSTS_N_INSNS (1)   /* alu.  */
1545   }
1546 };
1547
1548 const struct cpu_cost_table cortexa15_extra_costs =
1549 {
1550   /* ALU */
1551   {
1552     0,                  /* arith.  */
1553     0,                  /* logical.  */
1554     0,                  /* shift.  */
1555     0,                  /* shift_reg.  */
1556     COSTS_N_INSNS (1),  /* arith_shift.  */
1557     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1558     COSTS_N_INSNS (1),  /* log_shift.  */
1559     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1560     0,                  /* extend.  */
1561     COSTS_N_INSNS (1),  /* extend_arith.  */
1562     COSTS_N_INSNS (1),  /* bfi.  */
1563     0,                  /* bfx.  */
1564     0,                  /* clz.  */
1565     0,                  /* rev.  */
1566     0,                  /* non_exec.  */
1567     true                /* non_exec_costs_exec.  */
1568   },
1569   /* MULT SImode */
1570   {
1571     {
1572       COSTS_N_INSNS (2),        /* simple.  */
1573       COSTS_N_INSNS (3),        /* flag_setting.  */
1574       COSTS_N_INSNS (2),        /* extend.  */
1575       COSTS_N_INSNS (2),        /* add.  */
1576       COSTS_N_INSNS (2),        /* extend_add.  */
1577       COSTS_N_INSNS (18)        /* idiv.  */
1578     },
1579     /* MULT DImode */
1580     {
1581       0,                        /* simple (N/A).  */
1582       0,                        /* flag_setting (N/A).  */
1583       COSTS_N_INSNS (3),        /* extend.  */
1584       0,                        /* add (N/A).  */
1585       COSTS_N_INSNS (3),        /* extend_add.  */
1586       0                         /* idiv (N/A).  */
1587     }
1588   },
1589   /* LD/ST */
1590   {
1591     COSTS_N_INSNS (3),  /* load.  */
1592     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1593     COSTS_N_INSNS (3),  /* ldrd.  */
1594     COSTS_N_INSNS (4),  /* ldm_1st.  */
1595     1,                  /* ldm_regs_per_insn_1st.  */
1596     2,                  /* ldm_regs_per_insn_subsequent.  */
1597     COSTS_N_INSNS (4),  /* loadf.  */
1598     COSTS_N_INSNS (4),  /* loadd.  */
1599     0,                  /* load_unaligned.  */
1600     0,                  /* store.  */
1601     0,                  /* strd.  */
1602     COSTS_N_INSNS (1),  /* stm_1st.  */
1603     1,                  /* stm_regs_per_insn_1st.  */
1604     2,                  /* stm_regs_per_insn_subsequent.  */
1605     0,                  /* storef.  */
1606     0,                  /* stored.  */
1607     0,                  /* store_unaligned.  */
1608     COSTS_N_INSNS (1),  /* loadv.  */
1609     COSTS_N_INSNS (1)   /* storev.  */
1610   },
1611   {
1612     /* FP SFmode */
1613     {
1614       COSTS_N_INSNS (17),       /* div.  */
1615       COSTS_N_INSNS (4),        /* mult.  */
1616       COSTS_N_INSNS (8),        /* mult_addsub. */
1617       COSTS_N_INSNS (8),        /* fma.  */
1618       COSTS_N_INSNS (4),        /* addsub.  */
1619       COSTS_N_INSNS (2),        /* fpconst. */
1620       COSTS_N_INSNS (2),        /* neg.  */
1621       COSTS_N_INSNS (5),        /* compare.  */
1622       COSTS_N_INSNS (4),        /* widen.  */
1623       COSTS_N_INSNS (4),        /* narrow.  */
1624       COSTS_N_INSNS (4),        /* toint.  */
1625       COSTS_N_INSNS (4),        /* fromint.  */
1626       COSTS_N_INSNS (4)         /* roundint.  */
1627     },
1628     /* FP DFmode */
1629     {
1630       COSTS_N_INSNS (31),       /* div.  */
1631       COSTS_N_INSNS (4),        /* mult.  */
1632       COSTS_N_INSNS (8),        /* mult_addsub.  */
1633       COSTS_N_INSNS (8),        /* fma.  */
1634       COSTS_N_INSNS (4),        /* addsub.  */
1635       COSTS_N_INSNS (2),        /* fpconst.  */
1636       COSTS_N_INSNS (2),        /* neg.  */
1637       COSTS_N_INSNS (2),        /* compare.  */
1638       COSTS_N_INSNS (4),        /* widen.  */
1639       COSTS_N_INSNS (4),        /* narrow.  */
1640       COSTS_N_INSNS (4),        /* toint.  */
1641       COSTS_N_INSNS (4),        /* fromint.  */
1642       COSTS_N_INSNS (4)         /* roundint.  */
1643     }
1644   },
1645   /* Vector */
1646   {
1647     COSTS_N_INSNS (1)   /* alu.  */
1648   }
1649 };
1650
1651 const struct cpu_cost_table v7m_extra_costs =
1652 {
1653   /* ALU */
1654   {
1655     0,                  /* arith.  */
1656     0,                  /* logical.  */
1657     0,                  /* shift.  */
1658     0,                  /* shift_reg.  */
1659     0,                  /* arith_shift.  */
1660     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1661     0,                  /* log_shift.  */
1662     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1663     0,                  /* extend.  */
1664     COSTS_N_INSNS (1),  /* extend_arith.  */
1665     0,                  /* bfi.  */
1666     0,                  /* bfx.  */
1667     0,                  /* clz.  */
1668     0,                  /* rev.  */
1669     COSTS_N_INSNS (1),  /* non_exec.  */
1670     false               /* non_exec_costs_exec.  */
1671   },
1672   {
1673     /* MULT SImode */
1674     {
1675       COSTS_N_INSNS (1),        /* simple.  */
1676       COSTS_N_INSNS (1),        /* flag_setting.  */
1677       COSTS_N_INSNS (2),        /* extend.  */
1678       COSTS_N_INSNS (1),        /* add.  */
1679       COSTS_N_INSNS (3),        /* extend_add.  */
1680       COSTS_N_INSNS (8)         /* idiv.  */
1681     },
1682     /* MULT DImode */
1683     {
1684       0,                        /* simple (N/A).  */
1685       0,                        /* flag_setting (N/A).  */
1686       COSTS_N_INSNS (2),        /* extend.  */
1687       0,                        /* add (N/A).  */
1688       COSTS_N_INSNS (3),        /* extend_add.  */
1689       0                         /* idiv (N/A).  */
1690     }
1691   },
1692   /* LD/ST */
1693   {
1694     COSTS_N_INSNS (2),  /* load.  */
1695     0,                  /* load_sign_extend.  */
1696     COSTS_N_INSNS (3),  /* ldrd.  */
1697     COSTS_N_INSNS (2),  /* ldm_1st.  */
1698     1,                  /* ldm_regs_per_insn_1st.  */
1699     1,                  /* ldm_regs_per_insn_subsequent.  */
1700     COSTS_N_INSNS (2),  /* loadf.  */
1701     COSTS_N_INSNS (3),  /* loadd.  */
1702     COSTS_N_INSNS (1),  /* load_unaligned.  */
1703     COSTS_N_INSNS (2),  /* store.  */
1704     COSTS_N_INSNS (3),  /* strd.  */
1705     COSTS_N_INSNS (2),  /* stm_1st.  */
1706     1,                  /* stm_regs_per_insn_1st.  */
1707     1,                  /* stm_regs_per_insn_subsequent.  */
1708     COSTS_N_INSNS (2),  /* storef.  */
1709     COSTS_N_INSNS (3),  /* stored.  */
1710     COSTS_N_INSNS (1),  /* store_unaligned.  */
1711     COSTS_N_INSNS (1),  /* loadv.  */
1712     COSTS_N_INSNS (1)   /* storev.  */
1713   },
1714   {
1715     /* FP SFmode */
1716     {
1717       COSTS_N_INSNS (7),        /* div.  */
1718       COSTS_N_INSNS (2),        /* mult.  */
1719       COSTS_N_INSNS (5),        /* mult_addsub.  */
1720       COSTS_N_INSNS (3),        /* fma.  */
1721       COSTS_N_INSNS (1),        /* addsub.  */
1722       0,                        /* fpconst.  */
1723       0,                        /* neg.  */
1724       0,                        /* compare.  */
1725       0,                        /* widen.  */
1726       0,                        /* narrow.  */
1727       0,                        /* toint.  */
1728       0,                        /* fromint.  */
1729       0                         /* roundint.  */
1730     },
1731     /* FP DFmode */
1732     {
1733       COSTS_N_INSNS (15),       /* div.  */
1734       COSTS_N_INSNS (5),        /* mult.  */
1735       COSTS_N_INSNS (7),        /* mult_addsub.  */
1736       COSTS_N_INSNS (7),        /* fma.  */
1737       COSTS_N_INSNS (3),        /* addsub.  */
1738       0,                        /* fpconst.  */
1739       0,                        /* neg.  */
1740       0,                        /* compare.  */
1741       0,                        /* widen.  */
1742       0,                        /* narrow.  */
1743       0,                        /* toint.  */
1744       0,                        /* fromint.  */
1745       0                         /* roundint.  */
1746     }
1747   },
1748   /* Vector */
1749   {
1750     COSTS_N_INSNS (1)   /* alu.  */
1751   }
1752 };
1753
1754 const struct tune_params arm_slowmul_tune =
1755 {
1756   &generic_extra_costs,                 /* Insn extra costs.  */
1757   NULL,                                 /* Sched adj cost.  */
1758   arm_default_branch_cost,
1759   &arm_default_vec_cost,
1760   3,                                            /* Constant limit.  */
1761   5,                                            /* Max cond insns.  */
1762   8,                                            /* Memset max inline.  */
1763   1,                                            /* Issue rate.  */
1764   ARM_PREFETCH_NOT_BENEFICIAL,
1765   tune_params::PREF_CONST_POOL_TRUE,
1766   tune_params::PREF_LDRD_FALSE,
1767   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1768   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1769   tune_params::DISPARAGE_FLAGS_NEITHER,
1770   tune_params::PREF_NEON_64_FALSE,
1771   tune_params::PREF_NEON_STRINGOPS_FALSE,
1772   tune_params::FUSE_NOTHING,
1773   tune_params::SCHED_AUTOPREF_OFF
1774 };
1775
1776 const struct tune_params arm_fastmul_tune =
1777 {
1778   &generic_extra_costs,                 /* Insn extra costs.  */
1779   NULL,                                 /* Sched adj cost.  */
1780   arm_default_branch_cost,
1781   &arm_default_vec_cost,
1782   1,                                            /* Constant limit.  */
1783   5,                                            /* Max cond insns.  */
1784   8,                                            /* Memset max inline.  */
1785   1,                                            /* Issue rate.  */
1786   ARM_PREFETCH_NOT_BENEFICIAL,
1787   tune_params::PREF_CONST_POOL_TRUE,
1788   tune_params::PREF_LDRD_FALSE,
1789   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1790   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1791   tune_params::DISPARAGE_FLAGS_NEITHER,
1792   tune_params::PREF_NEON_64_FALSE,
1793   tune_params::PREF_NEON_STRINGOPS_FALSE,
1794   tune_params::FUSE_NOTHING,
1795   tune_params::SCHED_AUTOPREF_OFF
1796 };
1797
1798 /* StrongARM has early execution of branches, so a sequence that is worth
1799    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1800
1801 const struct tune_params arm_strongarm_tune =
1802 {
1803   &generic_extra_costs,                 /* Insn extra costs.  */
1804   NULL,                                 /* Sched adj cost.  */
1805   arm_default_branch_cost,
1806   &arm_default_vec_cost,
1807   1,                                            /* Constant limit.  */
1808   3,                                            /* Max cond insns.  */
1809   8,                                            /* Memset max inline.  */
1810   1,                                            /* Issue rate.  */
1811   ARM_PREFETCH_NOT_BENEFICIAL,
1812   tune_params::PREF_CONST_POOL_TRUE,
1813   tune_params::PREF_LDRD_FALSE,
1814   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1815   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1816   tune_params::DISPARAGE_FLAGS_NEITHER,
1817   tune_params::PREF_NEON_64_FALSE,
1818   tune_params::PREF_NEON_STRINGOPS_FALSE,
1819   tune_params::FUSE_NOTHING,
1820   tune_params::SCHED_AUTOPREF_OFF
1821 };
1822
1823 const struct tune_params arm_xscale_tune =
1824 {
1825   &generic_extra_costs,                 /* Insn extra costs.  */
1826   xscale_sched_adjust_cost,
1827   arm_default_branch_cost,
1828   &arm_default_vec_cost,
1829   2,                                            /* Constant limit.  */
1830   3,                                            /* Max cond insns.  */
1831   8,                                            /* Memset max inline.  */
1832   1,                                            /* Issue rate.  */
1833   ARM_PREFETCH_NOT_BENEFICIAL,
1834   tune_params::PREF_CONST_POOL_TRUE,
1835   tune_params::PREF_LDRD_FALSE,
1836   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1837   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1838   tune_params::DISPARAGE_FLAGS_NEITHER,
1839   tune_params::PREF_NEON_64_FALSE,
1840   tune_params::PREF_NEON_STRINGOPS_FALSE,
1841   tune_params::FUSE_NOTHING,
1842   tune_params::SCHED_AUTOPREF_OFF
1843 };
1844
1845 const struct tune_params arm_9e_tune =
1846 {
1847   &generic_extra_costs,                 /* Insn extra costs.  */
1848   NULL,                                 /* Sched adj cost.  */
1849   arm_default_branch_cost,
1850   &arm_default_vec_cost,
1851   1,                                            /* Constant limit.  */
1852   5,                                            /* Max cond insns.  */
1853   8,                                            /* Memset max inline.  */
1854   1,                                            /* Issue rate.  */
1855   ARM_PREFETCH_NOT_BENEFICIAL,
1856   tune_params::PREF_CONST_POOL_TRUE,
1857   tune_params::PREF_LDRD_FALSE,
1858   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1859   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1860   tune_params::DISPARAGE_FLAGS_NEITHER,
1861   tune_params::PREF_NEON_64_FALSE,
1862   tune_params::PREF_NEON_STRINGOPS_FALSE,
1863   tune_params::FUSE_NOTHING,
1864   tune_params::SCHED_AUTOPREF_OFF
1865 };
1866
1867 const struct tune_params arm_marvell_pj4_tune =
1868 {
1869   &generic_extra_costs,                 /* Insn extra costs.  */
1870   NULL,                                 /* Sched adj cost.  */
1871   arm_default_branch_cost,
1872   &arm_default_vec_cost,
1873   1,                                            /* Constant limit.  */
1874   5,                                            /* Max cond insns.  */
1875   8,                                            /* Memset max inline.  */
1876   2,                                            /* Issue rate.  */
1877   ARM_PREFETCH_NOT_BENEFICIAL,
1878   tune_params::PREF_CONST_POOL_TRUE,
1879   tune_params::PREF_LDRD_FALSE,
1880   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1881   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1882   tune_params::DISPARAGE_FLAGS_NEITHER,
1883   tune_params::PREF_NEON_64_FALSE,
1884   tune_params::PREF_NEON_STRINGOPS_FALSE,
1885   tune_params::FUSE_NOTHING,
1886   tune_params::SCHED_AUTOPREF_OFF
1887 };
1888
1889 const struct tune_params arm_v6t2_tune =
1890 {
1891   &generic_extra_costs,                 /* Insn extra costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   1,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_FALSE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_64_FALSE,
1906   tune_params::PREF_NEON_STRINGOPS_FALSE,
1907   tune_params::FUSE_NOTHING,
1908   tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911
1912 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1913 const struct tune_params arm_cortex_tune =
1914 {
1915   &generic_extra_costs,
1916   NULL,                                 /* Sched adj cost.  */
1917   arm_default_branch_cost,
1918   &arm_default_vec_cost,
1919   1,                                            /* Constant limit.  */
1920   5,                                            /* Max cond insns.  */
1921   8,                                            /* Memset max inline.  */
1922   2,                                            /* Issue rate.  */
1923   ARM_PREFETCH_NOT_BENEFICIAL,
1924   tune_params::PREF_CONST_POOL_FALSE,
1925   tune_params::PREF_LDRD_FALSE,
1926   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1927   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1928   tune_params::DISPARAGE_FLAGS_NEITHER,
1929   tune_params::PREF_NEON_64_FALSE,
1930   tune_params::PREF_NEON_STRINGOPS_FALSE,
1931   tune_params::FUSE_NOTHING,
1932   tune_params::SCHED_AUTOPREF_OFF
1933 };
1934
1935 const struct tune_params arm_cortex_a8_tune =
1936 {
1937   &cortexa8_extra_costs,
1938   NULL,                                 /* Sched adj cost.  */
1939   arm_default_branch_cost,
1940   &arm_default_vec_cost,
1941   1,                                            /* Constant limit.  */
1942   5,                                            /* Max cond insns.  */
1943   8,                                            /* Memset max inline.  */
1944   2,                                            /* Issue rate.  */
1945   ARM_PREFETCH_NOT_BENEFICIAL,
1946   tune_params::PREF_CONST_POOL_FALSE,
1947   tune_params::PREF_LDRD_FALSE,
1948   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1950   tune_params::DISPARAGE_FLAGS_NEITHER,
1951   tune_params::PREF_NEON_64_FALSE,
1952   tune_params::PREF_NEON_STRINGOPS_TRUE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_cortex_a7_tune =
1958 {
1959   &cortexa7_extra_costs,
1960   NULL,                                 /* Sched adj cost.  */
1961   arm_default_branch_cost,
1962   &arm_default_vec_cost,
1963   1,                                            /* Constant limit.  */
1964   5,                                            /* Max cond insns.  */
1965   8,                                            /* Memset max inline.  */
1966   2,                                            /* Issue rate.  */
1967   ARM_PREFETCH_NOT_BENEFICIAL,
1968   tune_params::PREF_CONST_POOL_FALSE,
1969   tune_params::PREF_LDRD_FALSE,
1970   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1972   tune_params::DISPARAGE_FLAGS_NEITHER,
1973   tune_params::PREF_NEON_64_FALSE,
1974   tune_params::PREF_NEON_STRINGOPS_TRUE,
1975   tune_params::FUSE_NOTHING,
1976   tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_cortex_a15_tune =
1980 {
1981   &cortexa15_extra_costs,
1982   NULL,                                 /* Sched adj cost.  */
1983   arm_default_branch_cost,
1984   &arm_default_vec_cost,
1985   1,                                            /* Constant limit.  */
1986   2,                                            /* Max cond insns.  */
1987   8,                                            /* Memset max inline.  */
1988   3,                                            /* Issue rate.  */
1989   ARM_PREFETCH_NOT_BENEFICIAL,
1990   tune_params::PREF_CONST_POOL_FALSE,
1991   tune_params::PREF_LDRD_TRUE,
1992   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1993   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1994   tune_params::DISPARAGE_FLAGS_ALL,
1995   tune_params::PREF_NEON_64_FALSE,
1996   tune_params::PREF_NEON_STRINGOPS_TRUE,
1997   tune_params::FUSE_NOTHING,
1998   tune_params::SCHED_AUTOPREF_FULL
1999 };
2000
2001 const struct tune_params arm_cortex_a35_tune =
2002 {
2003   &cortexa53_extra_costs,
2004   NULL,                                 /* Sched adj cost.  */
2005   arm_default_branch_cost,
2006   &arm_default_vec_cost,
2007   1,                                            /* Constant limit.  */
2008   5,                                            /* Max cond insns.  */
2009   8,                                            /* Memset max inline.  */
2010   1,                                            /* Issue rate.  */
2011   ARM_PREFETCH_NOT_BENEFICIAL,
2012   tune_params::PREF_CONST_POOL_FALSE,
2013   tune_params::PREF_LDRD_FALSE,
2014   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2016   tune_params::DISPARAGE_FLAGS_NEITHER,
2017   tune_params::PREF_NEON_64_FALSE,
2018   tune_params::PREF_NEON_STRINGOPS_TRUE,
2019   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_cortex_a53_tune =
2024 {
2025   &cortexa53_extra_costs,
2026   NULL,                                 /* Sched adj cost.  */
2027   arm_default_branch_cost,
2028   &arm_default_vec_cost,
2029   1,                                            /* Constant limit.  */
2030   5,                                            /* Max cond insns.  */
2031   8,                                            /* Memset max inline.  */
2032   2,                                            /* Issue rate.  */
2033   ARM_PREFETCH_NOT_BENEFICIAL,
2034   tune_params::PREF_CONST_POOL_FALSE,
2035   tune_params::PREF_LDRD_FALSE,
2036   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2038   tune_params::DISPARAGE_FLAGS_NEITHER,
2039   tune_params::PREF_NEON_64_FALSE,
2040   tune_params::PREF_NEON_STRINGOPS_TRUE,
2041   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045 const struct tune_params arm_cortex_a57_tune =
2046 {
2047   &cortexa57_extra_costs,
2048   NULL,                                 /* Sched adj cost.  */
2049   arm_default_branch_cost,
2050   &arm_default_vec_cost,
2051   1,                                            /* Constant limit.  */
2052   2,                                            /* Max cond insns.  */
2053   8,                                            /* Memset max inline.  */
2054   3,                                            /* Issue rate.  */
2055   ARM_PREFETCH_NOT_BENEFICIAL,
2056   tune_params::PREF_CONST_POOL_FALSE,
2057   tune_params::PREF_LDRD_TRUE,
2058   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2059   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2060   tune_params::DISPARAGE_FLAGS_ALL,
2061   tune_params::PREF_NEON_64_FALSE,
2062   tune_params::PREF_NEON_STRINGOPS_TRUE,
2063   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2064   tune_params::SCHED_AUTOPREF_FULL
2065 };
2066
2067 const struct tune_params arm_exynosm1_tune =
2068 {
2069   &exynosm1_extra_costs,
2070   NULL,                                         /* Sched adj cost.  */
2071   arm_default_branch_cost,
2072   &arm_default_vec_cost,
2073   1,                                            /* Constant limit.  */
2074   2,                                            /* Max cond insns.  */
2075   8,                                            /* Memset max inline.  */
2076   3,                                            /* Issue rate.  */
2077   ARM_PREFETCH_NOT_BENEFICIAL,
2078   tune_params::PREF_CONST_POOL_FALSE,
2079   tune_params::PREF_LDRD_TRUE,
2080   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2081   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2082   tune_params::DISPARAGE_FLAGS_ALL,
2083   tune_params::PREF_NEON_64_FALSE,
2084   tune_params::PREF_NEON_STRINGOPS_TRUE,
2085   tune_params::FUSE_NOTHING,
2086   tune_params::SCHED_AUTOPREF_OFF
2087 };
2088
2089 const struct tune_params arm_xgene1_tune =
2090 {
2091   &xgene1_extra_costs,
2092   NULL,                                 /* Sched adj cost.  */
2093   arm_default_branch_cost,
2094   &arm_default_vec_cost,
2095   1,                                            /* Constant limit.  */
2096   2,                                            /* Max cond insns.  */
2097   32,                                           /* Memset max inline.  */
2098   4,                                            /* Issue rate.  */
2099   ARM_PREFETCH_NOT_BENEFICIAL,
2100   tune_params::PREF_CONST_POOL_FALSE,
2101   tune_params::PREF_LDRD_TRUE,
2102   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2103   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2104   tune_params::DISPARAGE_FLAGS_ALL,
2105   tune_params::PREF_NEON_64_FALSE,
2106   tune_params::PREF_NEON_STRINGOPS_FALSE,
2107   tune_params::FUSE_NOTHING,
2108   tune_params::SCHED_AUTOPREF_OFF
2109 };
2110
2111 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2112    less appealing.  Set max_insns_skipped to a low value.  */
2113
2114 const struct tune_params arm_cortex_a5_tune =
2115 {
2116   &cortexa5_extra_costs,
2117   NULL,                                 /* Sched adj cost.  */
2118   arm_cortex_a5_branch_cost,
2119   &arm_default_vec_cost,
2120   1,                                            /* Constant limit.  */
2121   1,                                            /* Max cond insns.  */
2122   8,                                            /* Memset max inline.  */
2123   2,                                            /* Issue rate.  */
2124   ARM_PREFETCH_NOT_BENEFICIAL,
2125   tune_params::PREF_CONST_POOL_FALSE,
2126   tune_params::PREF_LDRD_FALSE,
2127   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2128   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2129   tune_params::DISPARAGE_FLAGS_NEITHER,
2130   tune_params::PREF_NEON_64_FALSE,
2131   tune_params::PREF_NEON_STRINGOPS_TRUE,
2132   tune_params::FUSE_NOTHING,
2133   tune_params::SCHED_AUTOPREF_OFF
2134 };
2135
2136 const struct tune_params arm_cortex_a9_tune =
2137 {
2138   &cortexa9_extra_costs,
2139   cortex_a9_sched_adjust_cost,
2140   arm_default_branch_cost,
2141   &arm_default_vec_cost,
2142   1,                                            /* Constant limit.  */
2143   5,                                            /* Max cond insns.  */
2144   8,                                            /* Memset max inline.  */
2145   2,                                            /* Issue rate.  */
2146   ARM_PREFETCH_BENEFICIAL(4,32,32),
2147   tune_params::PREF_CONST_POOL_FALSE,
2148   tune_params::PREF_LDRD_FALSE,
2149   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2150   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2151   tune_params::DISPARAGE_FLAGS_NEITHER,
2152   tune_params::PREF_NEON_64_FALSE,
2153   tune_params::PREF_NEON_STRINGOPS_FALSE,
2154   tune_params::FUSE_NOTHING,
2155   tune_params::SCHED_AUTOPREF_OFF
2156 };
2157
2158 const struct tune_params arm_cortex_a12_tune =
2159 {
2160   &cortexa12_extra_costs,
2161   NULL,                                 /* Sched adj cost.  */
2162   arm_default_branch_cost,
2163   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2164   1,                                            /* Constant limit.  */
2165   2,                                            /* Max cond insns.  */
2166   8,                                            /* Memset max inline.  */
2167   2,                                            /* Issue rate.  */
2168   ARM_PREFETCH_NOT_BENEFICIAL,
2169   tune_params::PREF_CONST_POOL_FALSE,
2170   tune_params::PREF_LDRD_TRUE,
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2172   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2173   tune_params::DISPARAGE_FLAGS_ALL,
2174   tune_params::PREF_NEON_64_FALSE,
2175   tune_params::PREF_NEON_STRINGOPS_TRUE,
2176   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2177   tune_params::SCHED_AUTOPREF_OFF
2178 };
2179
2180 const struct tune_params arm_cortex_a73_tune =
2181 {
2182   &cortexa57_extra_costs,
2183   NULL,                                         /* Sched adj cost.  */
2184   arm_default_branch_cost,
2185   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2186   1,                                            /* Constant limit.  */
2187   2,                                            /* Max cond insns.  */
2188   8,                                            /* Memset max inline.  */
2189   2,                                            /* Issue rate.  */
2190   ARM_PREFETCH_NOT_BENEFICIAL,
2191   tune_params::PREF_CONST_POOL_FALSE,
2192   tune_params::PREF_LDRD_TRUE,
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2194   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2195   tune_params::DISPARAGE_FLAGS_ALL,
2196   tune_params::PREF_NEON_64_FALSE,
2197   tune_params::PREF_NEON_STRINGOPS_TRUE,
2198   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2199   tune_params::SCHED_AUTOPREF_FULL
2200 };
2201
2202 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2203    cycle to execute each.  An LDR from the constant pool also takes two cycles
2204    to execute, but mildly increases pipelining opportunity (consecutive
2205    loads/stores can be pipelined together, saving one cycle), and may also
2206    improve icache utilisation.  Hence we prefer the constant pool for such
2207    processors.  */
2208
2209 const struct tune_params arm_v7m_tune =
2210 {
2211   &v7m_extra_costs,
2212   NULL,                                 /* Sched adj cost.  */
2213   arm_cortex_m_branch_cost,
2214   &arm_default_vec_cost,
2215   1,                                            /* Constant limit.  */
2216   2,                                            /* Max cond insns.  */
2217   8,                                            /* Memset max inline.  */
2218   1,                                            /* Issue rate.  */
2219   ARM_PREFETCH_NOT_BENEFICIAL,
2220   tune_params::PREF_CONST_POOL_TRUE,
2221   tune_params::PREF_LDRD_FALSE,
2222   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2223   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2224   tune_params::DISPARAGE_FLAGS_NEITHER,
2225   tune_params::PREF_NEON_64_FALSE,
2226   tune_params::PREF_NEON_STRINGOPS_FALSE,
2227   tune_params::FUSE_NOTHING,
2228   tune_params::SCHED_AUTOPREF_OFF
2229 };
2230
2231 /* Cortex-M7 tuning.  */
2232
2233 const struct tune_params arm_cortex_m7_tune =
2234 {
2235   &v7m_extra_costs,
2236   NULL,                                 /* Sched adj cost.  */
2237   arm_cortex_m7_branch_cost,
2238   &arm_default_vec_cost,
2239   0,                                            /* Constant limit.  */
2240   1,                                            /* Max cond insns.  */
2241   8,                                            /* Memset max inline.  */
2242   2,                                            /* Issue rate.  */
2243   ARM_PREFETCH_NOT_BENEFICIAL,
2244   tune_params::PREF_CONST_POOL_TRUE,
2245   tune_params::PREF_LDRD_FALSE,
2246   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2247   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2248   tune_params::DISPARAGE_FLAGS_NEITHER,
2249   tune_params::PREF_NEON_64_FALSE,
2250   tune_params::PREF_NEON_STRINGOPS_FALSE,
2251   tune_params::FUSE_NOTHING,
2252   tune_params::SCHED_AUTOPREF_OFF
2253 };
2254
2255 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2256    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2257    cortex-m23.  */
2258 const struct tune_params arm_v6m_tune =
2259 {
2260   &generic_extra_costs,                 /* Insn extra costs.  */
2261   NULL,                                 /* Sched adj cost.  */
2262   arm_default_branch_cost,
2263   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2264   1,                                            /* Constant limit.  */
2265   5,                                            /* Max cond insns.  */
2266   8,                                            /* Memset max inline.  */
2267   1,                                            /* Issue rate.  */
2268   ARM_PREFETCH_NOT_BENEFICIAL,
2269   tune_params::PREF_CONST_POOL_FALSE,
2270   tune_params::PREF_LDRD_FALSE,
2271   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2272   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2273   tune_params::DISPARAGE_FLAGS_NEITHER,
2274   tune_params::PREF_NEON_64_FALSE,
2275   tune_params::PREF_NEON_STRINGOPS_FALSE,
2276   tune_params::FUSE_NOTHING,
2277   tune_params::SCHED_AUTOPREF_OFF
2278 };
2279
2280 const struct tune_params arm_fa726te_tune =
2281 {
2282   &generic_extra_costs,                         /* Insn extra costs.  */
2283   fa726te_sched_adjust_cost,
2284   arm_default_branch_cost,
2285   &arm_default_vec_cost,
2286   1,                                            /* Constant limit.  */
2287   5,                                            /* Max cond insns.  */
2288   8,                                            /* Memset max inline.  */
2289   2,                                            /* Issue rate.  */
2290   ARM_PREFETCH_NOT_BENEFICIAL,
2291   tune_params::PREF_CONST_POOL_TRUE,
2292   tune_params::PREF_LDRD_FALSE,
2293   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2294   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2295   tune_params::DISPARAGE_FLAGS_NEITHER,
2296   tune_params::PREF_NEON_64_FALSE,
2297   tune_params::PREF_NEON_STRINGOPS_FALSE,
2298   tune_params::FUSE_NOTHING,
2299   tune_params::SCHED_AUTOPREF_OFF
2300 };
2301
2302 /* Auto-generated CPU, FPU and architecture tables.  */
2303 #include "arm-cpu-data.h"
2304
2305 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2306    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2307    is thus chosen to be big enough to hold the longest architecture name.  */
2308
2309 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2310
2311 /* Supported TLS relocations.  */
2312
2313 enum tls_reloc {
2314   TLS_GD32,
2315   TLS_LDM32,
2316   TLS_LDO32,
2317   TLS_IE32,
2318   TLS_LE32,
2319   TLS_DESCSEQ   /* GNU scheme */
2320 };
2321
2322 /* The maximum number of insns to be used when loading a constant.  */
2323 inline static int
2324 arm_constant_limit (bool size_p)
2325 {
2326   return size_p ? 1 : current_tune->constant_limit;
2327 }
2328
2329 /* Emit an insn that's a simple single-set.  Both the operands must be known
2330    to be valid.  */
2331 inline static rtx_insn *
2332 emit_set_insn (rtx x, rtx y)
2333 {
2334   return emit_insn (gen_rtx_SET (x, y));
2335 }
2336
2337 /* Return the number of bits set in VALUE.  */
2338 static unsigned
2339 bit_count (unsigned long value)
2340 {
2341   unsigned long count = 0;
2342
2343   while (value)
2344     {
2345       count++;
2346       value &= value - 1;  /* Clear the least-significant set bit.  */
2347     }
2348
2349   return count;
2350 }
2351
2352 /* Return the number of bits set in BMAP.  */
2353 static unsigned
2354 bitmap_popcount (const sbitmap bmap)
2355 {
2356   unsigned int count = 0;
2357   unsigned int n = 0;
2358   sbitmap_iterator sbi;
2359
2360   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2361     count++;
2362   return count;
2363 }
2364
2365 typedef struct
2366 {
2367   machine_mode mode;
2368   const char *name;
2369 } arm_fixed_mode_set;
2370
2371 /* A small helper for setting fixed-point library libfuncs.  */
2372
2373 static void
2374 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2375                              const char *funcname, const char *modename,
2376                              int num_suffix)
2377 {
2378   char buffer[50];
2379
2380   if (num_suffix == 0)
2381     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2382   else
2383     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2384
2385   set_optab_libfunc (optable, mode, buffer);
2386 }
2387
2388 static void
2389 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2390                             machine_mode from, const char *funcname,
2391                             const char *toname, const char *fromname)
2392 {
2393   char buffer[50];
2394   const char *maybe_suffix_2 = "";
2395
2396   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2397   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2398       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2399       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2400     maybe_suffix_2 = "2";
2401
2402   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2403            maybe_suffix_2);
2404
2405   set_conv_libfunc (optable, to, from, buffer);
2406 }
2407
2408 /* Set up library functions unique to ARM.  */
2409
2410 static void
2411 arm_init_libfuncs (void)
2412 {
2413   /* For Linux, we have access to kernel support for atomic operations.  */
2414   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2415     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2416
2417   /* There are no special library functions unless we are using the
2418      ARM BPABI.  */
2419   if (!TARGET_BPABI)
2420     return;
2421
2422   /* The functions below are described in Section 4 of the "Run-Time
2423      ABI for the ARM architecture", Version 1.0.  */
2424
2425   /* Double-precision floating-point arithmetic.  Table 2.  */
2426   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2427   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2428   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2429   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2430   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2431
2432   /* Double-precision comparisons.  Table 3.  */
2433   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2434   set_optab_libfunc (ne_optab, DFmode, NULL);
2435   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2436   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2437   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2438   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2439   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2440
2441   /* Single-precision floating-point arithmetic.  Table 4.  */
2442   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2443   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2444   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2445   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2446   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2447
2448   /* Single-precision comparisons.  Table 5.  */
2449   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2450   set_optab_libfunc (ne_optab, SFmode, NULL);
2451   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2452   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2453   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2454   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2455   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2456
2457   /* Floating-point to integer conversions.  Table 6.  */
2458   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2459   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2460   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2461   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2462   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2463   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2464   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2465   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2466
2467   /* Conversions between floating types.  Table 7.  */
2468   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2469   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2470
2471   /* Integer to floating-point conversions.  Table 8.  */
2472   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2473   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2474   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2475   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2476   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2477   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2478   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2479   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2480
2481   /* Long long.  Table 9.  */
2482   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2483   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2484   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2485   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2486   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2487   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2488   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2489   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2490
2491   /* Integer (32/32->32) division.  \S 4.3.1.  */
2492   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2493   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2494
2495   /* The divmod functions are designed so that they can be used for
2496      plain division, even though they return both the quotient and the
2497      remainder.  The quotient is returned in the usual location (i.e.,
2498      r0 for SImode, {r0, r1} for DImode), just as would be expected
2499      for an ordinary division routine.  Because the AAPCS calling
2500      conventions specify that all of { r0, r1, r2, r3 } are
2501      callee-saved registers, there is no need to tell the compiler
2502      explicitly that those registers are clobbered by these
2503      routines.  */
2504   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2505   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2506
2507   /* For SImode division the ABI provides div-without-mod routines,
2508      which are faster.  */
2509   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2510   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2511
2512   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2513      divmod libcalls instead.  */
2514   set_optab_libfunc (smod_optab, DImode, NULL);
2515   set_optab_libfunc (umod_optab, DImode, NULL);
2516   set_optab_libfunc (smod_optab, SImode, NULL);
2517   set_optab_libfunc (umod_optab, SImode, NULL);
2518
2519   /* Half-precision float operations.  The compiler handles all operations
2520      with NULL libfuncs by converting the SFmode.  */
2521   switch (arm_fp16_format)
2522     {
2523     case ARM_FP16_FORMAT_IEEE:
2524     case ARM_FP16_FORMAT_ALTERNATIVE:
2525
2526       /* Conversions.  */
2527       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2528                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2529                          ? "__gnu_f2h_ieee"
2530                          : "__gnu_f2h_alternative"));
2531       set_conv_libfunc (sext_optab, SFmode, HFmode,
2532                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533                          ? "__gnu_h2f_ieee"
2534                          : "__gnu_h2f_alternative"));
2535
2536       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2537                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538                          ? "__gnu_d2h_ieee"
2539                          : "__gnu_d2h_alternative"));
2540
2541       /* Arithmetic.  */
2542       set_optab_libfunc (add_optab, HFmode, NULL);
2543       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2544       set_optab_libfunc (smul_optab, HFmode, NULL);
2545       set_optab_libfunc (neg_optab, HFmode, NULL);
2546       set_optab_libfunc (sub_optab, HFmode, NULL);
2547
2548       /* Comparisons.  */
2549       set_optab_libfunc (eq_optab, HFmode, NULL);
2550       set_optab_libfunc (ne_optab, HFmode, NULL);
2551       set_optab_libfunc (lt_optab, HFmode, NULL);
2552       set_optab_libfunc (le_optab, HFmode, NULL);
2553       set_optab_libfunc (ge_optab, HFmode, NULL);
2554       set_optab_libfunc (gt_optab, HFmode, NULL);
2555       set_optab_libfunc (unord_optab, HFmode, NULL);
2556       break;
2557
2558     default:
2559       break;
2560     }
2561
2562   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2563   {
2564     const arm_fixed_mode_set fixed_arith_modes[] =
2565       {
2566         { E_QQmode, "qq" },
2567         { E_UQQmode, "uqq" },
2568         { E_HQmode, "hq" },
2569         { E_UHQmode, "uhq" },
2570         { E_SQmode, "sq" },
2571         { E_USQmode, "usq" },
2572         { E_DQmode, "dq" },
2573         { E_UDQmode, "udq" },
2574         { E_TQmode, "tq" },
2575         { E_UTQmode, "utq" },
2576         { E_HAmode, "ha" },
2577         { E_UHAmode, "uha" },
2578         { E_SAmode, "sa" },
2579         { E_USAmode, "usa" },
2580         { E_DAmode, "da" },
2581         { E_UDAmode, "uda" },
2582         { E_TAmode, "ta" },
2583         { E_UTAmode, "uta" }
2584       };
2585     const arm_fixed_mode_set fixed_conv_modes[] =
2586       {
2587         { E_QQmode, "qq" },
2588         { E_UQQmode, "uqq" },
2589         { E_HQmode, "hq" },
2590         { E_UHQmode, "uhq" },
2591         { E_SQmode, "sq" },
2592         { E_USQmode, "usq" },
2593         { E_DQmode, "dq" },
2594         { E_UDQmode, "udq" },
2595         { E_TQmode, "tq" },
2596         { E_UTQmode, "utq" },
2597         { E_HAmode, "ha" },
2598         { E_UHAmode, "uha" },
2599         { E_SAmode, "sa" },
2600         { E_USAmode, "usa" },
2601         { E_DAmode, "da" },
2602         { E_UDAmode, "uda" },
2603         { E_TAmode, "ta" },
2604         { E_UTAmode, "uta" },
2605         { E_QImode, "qi" },
2606         { E_HImode, "hi" },
2607         { E_SImode, "si" },
2608         { E_DImode, "di" },
2609         { E_TImode, "ti" },
2610         { E_SFmode, "sf" },
2611         { E_DFmode, "df" }
2612       };
2613     unsigned int i, j;
2614
2615     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2616       {
2617         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2618                                      "add", fixed_arith_modes[i].name, 3);
2619         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2620                                      "ssadd", fixed_arith_modes[i].name, 3);
2621         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2622                                      "usadd", fixed_arith_modes[i].name, 3);
2623         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2624                                      "sub", fixed_arith_modes[i].name, 3);
2625         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2626                                      "sssub", fixed_arith_modes[i].name, 3);
2627         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2628                                      "ussub", fixed_arith_modes[i].name, 3);
2629         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2630                                      "mul", fixed_arith_modes[i].name, 3);
2631         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2632                                      "ssmul", fixed_arith_modes[i].name, 3);
2633         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2634                                      "usmul", fixed_arith_modes[i].name, 3);
2635         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2636                                      "div", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2638                                      "udiv", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2640                                      "ssdiv", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2642                                      "usdiv", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2644                                      "neg", fixed_arith_modes[i].name, 2);
2645         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2646                                      "ssneg", fixed_arith_modes[i].name, 2);
2647         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2648                                      "usneg", fixed_arith_modes[i].name, 2);
2649         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2650                                      "ashl", fixed_arith_modes[i].name, 3);
2651         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2652                                      "ashr", fixed_arith_modes[i].name, 3);
2653         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2654                                      "lshr", fixed_arith_modes[i].name, 3);
2655         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2656                                      "ssashl", fixed_arith_modes[i].name, 3);
2657         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2658                                      "usashl", fixed_arith_modes[i].name, 3);
2659         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2660                                      "cmp", fixed_arith_modes[i].name, 2);
2661       }
2662
2663     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2664       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2665         {
2666           if (i == j
2667               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2668                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2669             continue;
2670
2671           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2672                                       fixed_conv_modes[j].mode, "fract",
2673                                       fixed_conv_modes[i].name,
2674                                       fixed_conv_modes[j].name);
2675           arm_set_fixed_conv_libfunc (satfract_optab,
2676                                       fixed_conv_modes[i].mode,
2677                                       fixed_conv_modes[j].mode, "satfract",
2678                                       fixed_conv_modes[i].name,
2679                                       fixed_conv_modes[j].name);
2680           arm_set_fixed_conv_libfunc (fractuns_optab,
2681                                       fixed_conv_modes[i].mode,
2682                                       fixed_conv_modes[j].mode, "fractuns",
2683                                       fixed_conv_modes[i].name,
2684                                       fixed_conv_modes[j].name);
2685           arm_set_fixed_conv_libfunc (satfractuns_optab,
2686                                       fixed_conv_modes[i].mode,
2687                                       fixed_conv_modes[j].mode, "satfractuns",
2688                                       fixed_conv_modes[i].name,
2689                                       fixed_conv_modes[j].name);
2690         }
2691   }
2692
2693   if (TARGET_AAPCS_BASED)
2694     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2695 }
2696
2697 /* On AAPCS systems, this is the "struct __va_list".  */
2698 static GTY(()) tree va_list_type;
2699
2700 /* Return the type to use as __builtin_va_list.  */
2701 static tree
2702 arm_build_builtin_va_list (void)
2703 {
2704   tree va_list_name;
2705   tree ap_field;
2706
2707   if (!TARGET_AAPCS_BASED)
2708     return std_build_builtin_va_list ();
2709
2710   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2711      defined as:
2712
2713        struct __va_list
2714        {
2715          void *__ap;
2716        };
2717
2718      The C Library ABI further reinforces this definition in \S
2719      4.1.
2720
2721      We must follow this definition exactly.  The structure tag
2722      name is visible in C++ mangled names, and thus forms a part
2723      of the ABI.  The field name may be used by people who
2724      #include <stdarg.h>.  */
2725   /* Create the type.  */
2726   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2727   /* Give it the required name.  */
2728   va_list_name = build_decl (BUILTINS_LOCATION,
2729                              TYPE_DECL,
2730                              get_identifier ("__va_list"),
2731                              va_list_type);
2732   DECL_ARTIFICIAL (va_list_name) = 1;
2733   TYPE_NAME (va_list_type) = va_list_name;
2734   TYPE_STUB_DECL (va_list_type) = va_list_name;
2735   /* Create the __ap field.  */
2736   ap_field = build_decl (BUILTINS_LOCATION,
2737                          FIELD_DECL,
2738                          get_identifier ("__ap"),
2739                          ptr_type_node);
2740   DECL_ARTIFICIAL (ap_field) = 1;
2741   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2742   TYPE_FIELDS (va_list_type) = ap_field;
2743   /* Compute its layout.  */
2744   layout_type (va_list_type);
2745
2746   return va_list_type;
2747 }
2748
2749 /* Return an expression of type "void *" pointing to the next
2750    available argument in a variable-argument list.  VALIST is the
2751    user-level va_list object, of type __builtin_va_list.  */
2752 static tree
2753 arm_extract_valist_ptr (tree valist)
2754 {
2755   if (TREE_TYPE (valist) == error_mark_node)
2756     return error_mark_node;
2757
2758   /* On an AAPCS target, the pointer is stored within "struct
2759      va_list".  */
2760   if (TARGET_AAPCS_BASED)
2761     {
2762       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2763       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2764                        valist, ap_field, NULL_TREE);
2765     }
2766
2767   return valist;
2768 }
2769
2770 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2771 static void
2772 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2773 {
2774   valist = arm_extract_valist_ptr (valist);
2775   std_expand_builtin_va_start (valist, nextarg);
2776 }
2777
2778 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2779 static tree
2780 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2781                           gimple_seq *post_p)
2782 {
2783   valist = arm_extract_valist_ptr (valist);
2784   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2785 }
2786
2787 /* Check any incompatible options that the user has specified.  */
2788 static void
2789 arm_option_check_internal (struct gcc_options *opts)
2790 {
2791   int flags = opts->x_target_flags;
2792
2793   /* iWMMXt and NEON are incompatible.  */
2794   if (TARGET_IWMMXT
2795       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2796     error ("iWMMXt and NEON are incompatible");
2797
2798   /* Make sure that the processor choice does not conflict with any of the
2799      other command line choices.  */
2800   if (TARGET_ARM_P (flags)
2801       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2802     error ("target CPU does not support ARM mode");
2803
2804   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2805   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2806     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2807
2808   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2809     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2810
2811   /* If this target is normally configured to use APCS frames, warn if they
2812      are turned off and debugging is turned on.  */
2813   if (TARGET_ARM_P (flags)
2814       && write_symbols != NO_DEBUG
2815       && !TARGET_APCS_FRAME
2816       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2817     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2818
2819   /* iWMMXt unsupported under Thumb mode.  */
2820   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2821     error ("iWMMXt unsupported under Thumb mode");
2822
2823   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2824     error ("can not use -mtp=cp15 with 16-bit Thumb");
2825
2826   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2827     {
2828       error ("RTP PIC is incompatible with Thumb");
2829       flag_pic = 0;
2830     }
2831
2832   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2833      with MOVT.  */
2834   if ((target_pure_code || target_slow_flash_data)
2835       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2836     {
2837       const char *flag = (target_pure_code ? "-mpure-code" :
2838                                              "-mslow-flash-data");
2839       error ("%s only supports non-pic code on M-profile targets with the "
2840              "MOVT instruction", flag);
2841     }
2842
2843 }
2844
2845 /* Recompute the global settings depending on target attribute options.  */
2846
2847 static void
2848 arm_option_params_internal (void)
2849 {
2850   /* If we are not using the default (ARM mode) section anchor offset
2851      ranges, then set the correct ranges now.  */
2852   if (TARGET_THUMB1)
2853     {
2854       /* Thumb-1 LDR instructions cannot have negative offsets.
2855          Permissible positive offset ranges are 5-bit (for byte loads),
2856          6-bit (for halfword loads), or 7-bit (for word loads).
2857          Empirical results suggest a 7-bit anchor range gives the best
2858          overall code size.  */
2859       targetm.min_anchor_offset = 0;
2860       targetm.max_anchor_offset = 127;
2861     }
2862   else if (TARGET_THUMB2)
2863     {
2864       /* The minimum is set such that the total size of the block
2865          for a particular anchor is 248 + 1 + 4095 bytes, which is
2866          divisible by eight, ensuring natural spacing of anchors.  */
2867       targetm.min_anchor_offset = -248;
2868       targetm.max_anchor_offset = 4095;
2869     }
2870   else
2871     {
2872       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2874     }
2875
2876   /* Increase the number of conditional instructions with -Os.  */
2877   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2878
2879   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2880   if (TARGET_THUMB2)
2881     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2882 }
2883
2884 /* True if -mflip-thumb should next add an attribute for the default
2885    mode, false if it should next add an attribute for the opposite mode.  */
2886 static GTY(()) bool thumb_flipper;
2887
2888 /* Options after initial target override.  */
2889 static GTY(()) tree init_optimize;
2890
2891 static void
2892 arm_override_options_after_change_1 (struct gcc_options *opts)
2893 {
2894   if (opts->x_align_functions <= 0)
2895     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2896       && opts->x_optimize_size ? 2 : 4;
2897 }
2898
2899 /* Implement targetm.override_options_after_change.  */
2900
2901 static void
2902 arm_override_options_after_change (void)
2903 {
2904   arm_configure_build_target (&arm_active_target,
2905                               TREE_TARGET_OPTION (target_option_default_node),
2906                               &global_options_set, false);
2907
2908   arm_override_options_after_change_1 (&global_options);
2909 }
2910
2911 /* Implement TARGET_OPTION_SAVE.  */
2912 static void
2913 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2914 {
2915   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2916   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2917   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2918 }
2919
2920 /* Implement TARGET_OPTION_RESTORE.  */
2921 static void
2922 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2923 {
2924   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2925   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2926   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2927   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2928                               false);
2929 }
2930
2931 /* Reset options between modes that the user has specified.  */
2932 static void
2933 arm_option_override_internal (struct gcc_options *opts,
2934                               struct gcc_options *opts_set)
2935 {
2936   arm_override_options_after_change_1 (opts);
2937
2938   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2939     {
2940       /* The default is to enable interworking, so this warning message would
2941          be confusing to users who have just compiled with, eg, -march=armv3.  */
2942       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2943       opts->x_target_flags &= ~MASK_INTERWORK;
2944     }
2945
2946   if (TARGET_THUMB_P (opts->x_target_flags)
2947       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948     {
2949       warning (0, "target CPU does not support THUMB instructions");
2950       opts->x_target_flags &= ~MASK_THUMB;
2951     }
2952
2953   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2954     {
2955       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2956       opts->x_target_flags &= ~MASK_APCS_FRAME;
2957     }
2958
2959   /* Callee super interworking implies thumb interworking.  Adding
2960      this to the flags here simplifies the logic elsewhere.  */
2961   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2962     opts->x_target_flags |= MASK_INTERWORK;
2963
2964   /* need to remember initial values so combinaisons of options like
2965      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2966   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2967
2968   if (! opts_set->x_arm_restrict_it)
2969     opts->x_arm_restrict_it = arm_arch8;
2970
2971   /* ARM execution state and M profile don't have [restrict] IT.  */
2972   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2973     opts->x_arm_restrict_it = 0;
2974
2975   /* Enable -munaligned-access by default for
2976      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2977      i.e. Thumb2 and ARM state only.
2978      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979      - ARMv8 architecture-base processors.
2980
2981      Disable -munaligned-access by default for
2982      - all pre-ARMv6 architecture-based processors
2983      - ARMv6-M architecture-based processors
2984      - ARMv8-M Baseline processors.  */
2985
2986   if (! opts_set->x_unaligned_access)
2987     {
2988       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2989                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2990     }
2991   else if (opts->x_unaligned_access == 1
2992            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2993     {
2994       warning (0, "target CPU does not support unaligned accesses");
2995      opts->x_unaligned_access = 0;
2996     }
2997
2998   /* Don't warn since it's on by default in -O2.  */
2999   if (TARGET_THUMB1_P (opts->x_target_flags))
3000     opts->x_flag_schedule_insns = 0;
3001   else
3002     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3003
3004   /* Disable shrink-wrap when optimizing function for size, since it tends to
3005      generate additional returns.  */
3006   if (optimize_function_for_size_p (cfun)
3007       && TARGET_THUMB2_P (opts->x_target_flags))
3008     opts->x_flag_shrink_wrap = false;
3009   else
3010     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3011
3012   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3013      - epilogue_insns - does not accurately model the corresponding insns
3014      emitted in the asm file.  In particular, see the comment in thumb_exit
3015      'Find out how many of the (return) argument registers we can corrupt'.
3016      As a consequence, the epilogue may clobber registers without fipa-ra
3017      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3018      TODO: Accurately model clobbers for epilogue_insns and reenable
3019      fipa-ra.  */
3020   if (TARGET_THUMB1_P (opts->x_target_flags))
3021     opts->x_flag_ipa_ra = 0;
3022   else
3023     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3024
3025   /* Thumb2 inline assembly code should always use unified syntax.
3026      This will apply to ARM and Thumb1 eventually.  */
3027   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3028
3029 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3030   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3031 #endif
3032 }
3033
3034 static sbitmap isa_all_fpubits;
3035 static sbitmap isa_quirkbits;
3036
3037 /* Configure a build target TARGET from the user-specified options OPTS and
3038    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3039    architecture have been specified, but the two are not identical.  */
3040 void
3041 arm_configure_build_target (struct arm_build_target *target,
3042                             struct cl_target_option *opts,
3043                             struct gcc_options *opts_set,
3044                             bool warn_compatible)
3045 {
3046   const cpu_option *arm_selected_tune = NULL;
3047   const arch_option *arm_selected_arch = NULL;
3048   const cpu_option *arm_selected_cpu = NULL;
3049   const arm_fpu_desc *arm_selected_fpu = NULL;
3050   const char *tune_opts = NULL;
3051   const char *arch_opts = NULL;
3052   const char *cpu_opts = NULL;
3053
3054   bitmap_clear (target->isa);
3055   target->core_name = NULL;
3056   target->arch_name = NULL;
3057
3058   if (opts_set->x_arm_arch_string)
3059     {
3060       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3061                                                       "-march",
3062                                                       opts->x_arm_arch_string);
3063       arch_opts = strchr (opts->x_arm_arch_string, '+');
3064     }
3065
3066   if (opts_set->x_arm_cpu_string)
3067     {
3068       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3069                                                     opts->x_arm_cpu_string);
3070       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3071       arm_selected_tune = arm_selected_cpu;
3072       /* If taking the tuning from -mcpu, we don't need to rescan the
3073          options for tuning.  */
3074     }
3075
3076   if (opts_set->x_arm_tune_string)
3077     {
3078       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3079                                                      opts->x_arm_tune_string);
3080       tune_opts = strchr (opts->x_arm_tune_string, '+');
3081     }
3082
3083   if (arm_selected_arch)
3084     {
3085       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3086       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3087                                  arch_opts);
3088
3089       if (arm_selected_cpu)
3090         {
3091           auto_sbitmap cpu_isa (isa_num_bits);
3092           auto_sbitmap isa_delta (isa_num_bits);
3093
3094           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3095           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3096                                      cpu_opts);
3097           bitmap_xor (isa_delta, cpu_isa, target->isa);
3098           /* Ignore any bits that are quirk bits.  */
3099           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3100           /* Ignore (for now) any bits that might be set by -mfpu.  */
3101           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3102
3103           if (!bitmap_empty_p (isa_delta))
3104             {
3105               if (warn_compatible)
3106                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3107                          arm_selected_cpu->common.name,
3108                          arm_selected_arch->common.name);
3109               /* -march wins for code generation.
3110                  -mcpu wins for default tuning.  */
3111               if (!arm_selected_tune)
3112                 arm_selected_tune = arm_selected_cpu;
3113
3114               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3115               target->arch_name = arm_selected_arch->common.name;
3116             }
3117           else
3118             {
3119               /* Architecture and CPU are essentially the same.
3120                  Prefer the CPU setting.  */
3121               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3122               target->core_name = arm_selected_cpu->common.name;
3123               /* Copy the CPU's capabilities, so that we inherit the
3124                  appropriate extensions and quirks.  */
3125               bitmap_copy (target->isa, cpu_isa);
3126             }
3127         }
3128       else
3129         {
3130           /* Pick a CPU based on the architecture.  */
3131           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3132           target->arch_name = arm_selected_arch->common.name;
3133           /* Note: target->core_name is left unset in this path.  */
3134         }
3135     }
3136   else if (arm_selected_cpu)
3137     {
3138       target->core_name = arm_selected_cpu->common.name;
3139       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3140       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3141                                  cpu_opts);
3142       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3143     }
3144   /* If the user did not specify a processor or architecture, choose
3145      one for them.  */
3146   else
3147     {
3148       const cpu_option *sel;
3149       auto_sbitmap sought_isa (isa_num_bits);
3150       bitmap_clear (sought_isa);
3151       auto_sbitmap default_isa (isa_num_bits);
3152
3153       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3154                                                     TARGET_CPU_DEFAULT);
3155       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3156       gcc_assert (arm_selected_cpu->common.name);
3157
3158       /* RWE: All of the selection logic below (to the end of this
3159          'if' clause) looks somewhat suspect.  It appears to be mostly
3160          there to support forcing thumb support when the default CPU
3161          does not have thumb (somewhat dubious in terms of what the
3162          user might be expecting).  I think it should be removed once
3163          support for the pre-thumb era cores is removed.  */
3164       sel = arm_selected_cpu;
3165       arm_initialize_isa (default_isa, sel->common.isa_bits);
3166       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3167                                  cpu_opts);
3168
3169       /* Now check to see if the user has specified any command line
3170          switches that require certain abilities from the cpu.  */
3171
3172       if (TARGET_INTERWORK || TARGET_THUMB)
3173         {
3174           bitmap_set_bit (sought_isa, isa_bit_thumb);
3175           bitmap_set_bit (sought_isa, isa_bit_mode32);
3176
3177           /* There are no ARM processors that support both APCS-26 and
3178              interworking.  Therefore we forcibly remove MODE26 from
3179              from the isa features here (if it was set), so that the
3180              search below will always be able to find a compatible
3181              processor.  */
3182           bitmap_clear_bit (default_isa, isa_bit_mode26);
3183         }
3184
3185       /* If there are such requirements and the default CPU does not
3186          satisfy them, we need to run over the complete list of
3187          cores looking for one that is satisfactory.  */
3188       if (!bitmap_empty_p (sought_isa)
3189           && !bitmap_subset_p (sought_isa, default_isa))
3190         {
3191           auto_sbitmap candidate_isa (isa_num_bits);
3192           /* We're only interested in a CPU with at least the
3193              capabilities of the default CPU and the required
3194              additional features.  */
3195           bitmap_ior (default_isa, default_isa, sought_isa);
3196
3197           /* Try to locate a CPU type that supports all of the abilities
3198              of the default CPU, plus the extra abilities requested by
3199              the user.  */
3200           for (sel = all_cores; sel->common.name != NULL; sel++)
3201             {
3202               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3203               /* An exact match?  */
3204               if (bitmap_equal_p (default_isa, candidate_isa))
3205                 break;
3206             }
3207
3208           if (sel->common.name == NULL)
3209             {
3210               unsigned current_bit_count = isa_num_bits;
3211               const cpu_option *best_fit = NULL;
3212
3213               /* Ideally we would like to issue an error message here
3214                  saying that it was not possible to find a CPU compatible
3215                  with the default CPU, but which also supports the command
3216                  line options specified by the programmer, and so they
3217                  ought to use the -mcpu=<name> command line option to
3218                  override the default CPU type.
3219
3220                  If we cannot find a CPU that has exactly the
3221                  characteristics of the default CPU and the given
3222                  command line options we scan the array again looking
3223                  for a best match.  The best match must have at least
3224                  the capabilities of the perfect match.  */
3225               for (sel = all_cores; sel->common.name != NULL; sel++)
3226                 {
3227                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3228
3229                   if (bitmap_subset_p (default_isa, candidate_isa))
3230                     {
3231                       unsigned count;
3232
3233                       bitmap_and_compl (candidate_isa, candidate_isa,
3234                                         default_isa);
3235                       count = bitmap_popcount (candidate_isa);
3236
3237                       if (count < current_bit_count)
3238                         {
3239                           best_fit = sel;
3240                           current_bit_count = count;
3241                         }
3242                     }
3243
3244                   gcc_assert (best_fit);
3245                   sel = best_fit;
3246                 }
3247             }
3248           arm_selected_cpu = sel;
3249         }
3250
3251       /* Now we know the CPU, we can finally initialize the target
3252          structure.  */
3253       target->core_name = arm_selected_cpu->common.name;
3254       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3255       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3256                                  cpu_opts);
3257       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3258     }
3259
3260   gcc_assert (arm_selected_cpu);
3261   gcc_assert (arm_selected_arch);
3262
3263   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3264     {
3265       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3266       auto_sbitmap fpu_bits (isa_num_bits);
3267
3268       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3269       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3270       bitmap_ior (target->isa, target->isa, fpu_bits);
3271     }
3272
3273   if (!arm_selected_tune)
3274     arm_selected_tune = arm_selected_cpu;
3275   else /* Validate the features passed to -mtune.  */
3276     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3277
3278   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3279
3280   /* Finish initializing the target structure.  */
3281   target->arch_pp_name = arm_selected_arch->arch;
3282   target->base_arch = arm_selected_arch->base_arch;
3283   target->profile = arm_selected_arch->profile;
3284
3285   target->tune_flags = tune_data->tune_flags;
3286   target->tune = tune_data->tune;
3287   target->tune_core = tune_data->scheduler;
3288 }
3289
3290 /* Fix up any incompatible options that the user has specified.  */
3291 static void
3292 arm_option_override (void)
3293 {
3294   static const enum isa_feature fpu_bitlist[]
3295     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3296   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3297   cl_target_option opts;
3298
3299   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3300   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3301
3302   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3303   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3304
3305   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3306
3307   if (!global_options_set.x_arm_fpu_index)
3308     {
3309       bool ok;
3310       int fpu_index;
3311
3312       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3313                                   CL_TARGET);
3314       gcc_assert (ok);
3315       arm_fpu_index = (enum fpu_type) fpu_index;
3316     }
3317
3318   cl_target_option_save (&opts, &global_options);
3319   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3320                               true);
3321
3322 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3323   SUBTARGET_OVERRIDE_OPTIONS;
3324 #endif
3325
3326   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3327   arm_base_arch = arm_active_target.base_arch;
3328
3329   arm_tune = arm_active_target.tune_core;
3330   tune_flags = arm_active_target.tune_flags;
3331   current_tune = arm_active_target.tune;
3332
3333   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3334   if (TARGET_APCS_FRAME)
3335     flag_shrink_wrap = false;
3336
3337   /* BPABI targets use linker tricks to allow interworking on cores
3338      without thumb support.  */
3339   if (TARGET_INTERWORK
3340       && !TARGET_BPABI
3341       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3342     {
3343       warning (0, "target CPU does not support interworking" );
3344       target_flags &= ~MASK_INTERWORK;
3345     }
3346
3347   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3348     {
3349       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3350       target_flags |= MASK_APCS_FRAME;
3351     }
3352
3353   if (TARGET_POKE_FUNCTION_NAME)
3354     target_flags |= MASK_APCS_FRAME;
3355
3356   if (TARGET_APCS_REENT && flag_pic)
3357     error ("-fpic and -mapcs-reent are incompatible");
3358
3359   if (TARGET_APCS_REENT)
3360     warning (0, "APCS reentrant code not supported.  Ignored");
3361
3362   /* Initialize boolean versions of the architectural flags, for use
3363      in the arm.md file.  */
3364   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3365   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3366   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3367   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3368   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3369   arm_arch5te = arm_arch5e
3370     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3371   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3372   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3373   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3374   arm_arch6m = arm_arch6 && !arm_arch_notm;
3375   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3376   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3377   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3378   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3379   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3380   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3381   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3382   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3383   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3384   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3385   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3386   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3387   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3388   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3389   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3390   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3391   if (arm_fp16_inst)
3392     {
3393       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3394         error ("selected fp16 options are incompatible");
3395       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3396     }
3397
3398
3399   /* Set up some tuning parameters.  */
3400   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3401   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3402   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3403   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3404   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3405   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3406
3407   /* And finally, set up some quirks.  */
3408   arm_arch_no_volatile_ce
3409     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3410   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3411                                             isa_bit_quirk_armv6kz);
3412
3413   /* V5 code we generate is completely interworking capable, so we turn off
3414      TARGET_INTERWORK here to avoid many tests later on.  */
3415
3416   /* XXX However, we must pass the right pre-processor defines to CPP
3417      or GLD can get confused.  This is a hack.  */
3418   if (TARGET_INTERWORK)
3419     arm_cpp_interwork = 1;
3420
3421   if (arm_arch5)
3422     target_flags &= ~MASK_INTERWORK;
3423
3424   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3425     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3426
3427   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3428     error ("iwmmxt abi requires an iwmmxt capable cpu");
3429
3430   /* If soft-float is specified then don't use FPU.  */
3431   if (TARGET_SOFT_FLOAT)
3432     arm_fpu_attr = FPU_NONE;
3433   else
3434     arm_fpu_attr = FPU_VFP;
3435
3436   if (TARGET_AAPCS_BASED)
3437     {
3438       if (TARGET_CALLER_INTERWORKING)
3439         error ("AAPCS does not support -mcaller-super-interworking");
3440       else
3441         if (TARGET_CALLEE_INTERWORKING)
3442           error ("AAPCS does not support -mcallee-super-interworking");
3443     }
3444
3445   /* __fp16 support currently assumes the core has ldrh.  */
3446   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3447     sorry ("__fp16 and no ldrh");
3448
3449   if (TARGET_AAPCS_BASED)
3450     {
3451       if (arm_abi == ARM_ABI_IWMMXT)
3452         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3453       else if (TARGET_HARD_FLOAT_ABI)
3454         {
3455           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3456           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3457             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3458         }
3459       else
3460         arm_pcs_default = ARM_PCS_AAPCS;
3461     }
3462   else
3463     {
3464       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3465         sorry ("-mfloat-abi=hard and VFP");
3466
3467       if (arm_abi == ARM_ABI_APCS)
3468         arm_pcs_default = ARM_PCS_APCS;
3469       else
3470         arm_pcs_default = ARM_PCS_ATPCS;
3471     }
3472
3473   /* For arm2/3 there is no need to do any scheduling if we are doing
3474      software floating-point.  */
3475   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3476     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3477
3478   /* Use the cp15 method if it is available.  */
3479   if (target_thread_pointer == TP_AUTO)
3480     {
3481       if (arm_arch6k && !TARGET_THUMB1)
3482         target_thread_pointer = TP_CP15;
3483       else
3484         target_thread_pointer = TP_SOFT;
3485     }
3486
3487   /* Override the default structure alignment for AAPCS ABI.  */
3488   if (!global_options_set.x_arm_structure_size_boundary)
3489     {
3490       if (TARGET_AAPCS_BASED)
3491         arm_structure_size_boundary = 8;
3492     }
3493   else
3494     {
3495       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3496
3497       if (arm_structure_size_boundary != 8
3498           && arm_structure_size_boundary != 32
3499           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3500         {
3501           if (ARM_DOUBLEWORD_ALIGN)
3502             warning (0,
3503                      "structure size boundary can only be set to 8, 32 or 64");
3504           else
3505             warning (0, "structure size boundary can only be set to 8 or 32");
3506           arm_structure_size_boundary
3507             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3508         }
3509     }
3510
3511   if (TARGET_VXWORKS_RTP)
3512     {
3513       if (!global_options_set.x_arm_pic_data_is_text_relative)
3514         arm_pic_data_is_text_relative = 0;
3515     }
3516   else if (flag_pic
3517            && !arm_pic_data_is_text_relative
3518            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3519     /* When text & data segments don't have a fixed displacement, the
3520        intended use is with a single, read only, pic base register.
3521        Unless the user explicitly requested not to do that, set
3522        it.  */
3523     target_flags |= MASK_SINGLE_PIC_BASE;
3524
3525   /* If stack checking is disabled, we can use r10 as the PIC register,
3526      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3527   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3528     {
3529       if (TARGET_VXWORKS_RTP)
3530         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3531       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3532     }
3533
3534   if (flag_pic && TARGET_VXWORKS_RTP)
3535     arm_pic_register = 9;
3536
3537   if (arm_pic_register_string != NULL)
3538     {
3539       int pic_register = decode_reg_name (arm_pic_register_string);
3540
3541       if (!flag_pic)
3542         warning (0, "-mpic-register= is useless without -fpic");
3543
3544       /* Prevent the user from choosing an obviously stupid PIC register.  */
3545       else if (pic_register < 0 || call_used_regs[pic_register]
3546                || pic_register == HARD_FRAME_POINTER_REGNUM
3547                || pic_register == STACK_POINTER_REGNUM
3548                || pic_register >= PC_REGNUM
3549                || (TARGET_VXWORKS_RTP
3550                    && (unsigned int) pic_register != arm_pic_register))
3551         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3552       else
3553         arm_pic_register = pic_register;
3554     }
3555
3556   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3557   if (fix_cm3_ldrd == 2)
3558     {
3559       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3560         fix_cm3_ldrd = 1;
3561       else
3562         fix_cm3_ldrd = 0;
3563     }
3564
3565   /* Hot/Cold partitioning is not currently supported, since we can't
3566      handle literal pool placement in that case.  */
3567   if (flag_reorder_blocks_and_partition)
3568     {
3569       inform (input_location,
3570               "-freorder-blocks-and-partition not supported on this architecture");
3571       flag_reorder_blocks_and_partition = 0;
3572       flag_reorder_blocks = 1;
3573     }
3574
3575   if (flag_pic)
3576     /* Hoisting PIC address calculations more aggressively provides a small,
3577        but measurable, size reduction for PIC code.  Therefore, we decrease
3578        the bar for unrestricted expression hoisting to the cost of PIC address
3579        calculation, which is 2 instructions.  */
3580     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3581                            global_options.x_param_values,
3582                            global_options_set.x_param_values);
3583
3584   /* ARM EABI defaults to strict volatile bitfields.  */
3585   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3586       && abi_version_at_least(2))
3587     flag_strict_volatile_bitfields = 1;
3588
3589   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3590      have deemed it beneficial (signified by setting
3591      prefetch.num_slots to 1 or more).  */
3592   if (flag_prefetch_loop_arrays < 0
3593       && HAVE_prefetch
3594       && optimize >= 3
3595       && current_tune->prefetch.num_slots > 0)
3596     flag_prefetch_loop_arrays = 1;
3597
3598   /* Set up parameters to be used in prefetching algorithm.  Do not
3599      override the defaults unless we are tuning for a core we have
3600      researched values for.  */
3601   if (current_tune->prefetch.num_slots > 0)
3602     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3603                            current_tune->prefetch.num_slots,
3604                            global_options.x_param_values,
3605                            global_options_set.x_param_values);
3606   if (current_tune->prefetch.l1_cache_line_size >= 0)
3607     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3608                            current_tune->prefetch.l1_cache_line_size,
3609                            global_options.x_param_values,
3610                            global_options_set.x_param_values);
3611   if (current_tune->prefetch.l1_cache_size >= 0)
3612     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3613                            current_tune->prefetch.l1_cache_size,
3614                            global_options.x_param_values,
3615                            global_options_set.x_param_values);
3616
3617   /* Use Neon to perform 64-bits operations rather than core
3618      registers.  */
3619   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3620   if (use_neon_for_64bits == 1)
3621      prefer_neon_for_64bits = true;
3622
3623   /* Use the alternative scheduling-pressure algorithm by default.  */
3624   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3625                          global_options.x_param_values,
3626                          global_options_set.x_param_values);
3627
3628   /* Look through ready list and all of queue for instructions
3629      relevant for L2 auto-prefetcher.  */
3630   int param_sched_autopref_queue_depth;
3631
3632   switch (current_tune->sched_autopref)
3633     {
3634     case tune_params::SCHED_AUTOPREF_OFF:
3635       param_sched_autopref_queue_depth = -1;
3636       break;
3637
3638     case tune_params::SCHED_AUTOPREF_RANK:
3639       param_sched_autopref_queue_depth = 0;
3640       break;
3641
3642     case tune_params::SCHED_AUTOPREF_FULL:
3643       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3644       break;
3645
3646     default:
3647       gcc_unreachable ();
3648     }
3649
3650   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3651                          param_sched_autopref_queue_depth,
3652                          global_options.x_param_values,
3653                          global_options_set.x_param_values);
3654
3655   /* Currently, for slow flash data, we just disable literal pools.  We also
3656      disable it for pure-code.  */
3657   if (target_slow_flash_data || target_pure_code)
3658     arm_disable_literal_pool = true;
3659
3660   if (use_cmse && !arm_arch_cmse)
3661     error ("target CPU does not support ARMv8-M Security Extensions");
3662
3663   /* Disable scheduling fusion by default if it's not armv7 processor
3664      or doesn't prefer ldrd/strd.  */
3665   if (flag_schedule_fusion == 2
3666       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3667     flag_schedule_fusion = 0;
3668
3669   /* Need to remember initial options before they are overriden.  */
3670   init_optimize = build_optimization_node (&global_options);
3671
3672   arm_option_override_internal (&global_options, &global_options_set);
3673   arm_option_check_internal (&global_options);
3674   arm_option_params_internal ();
3675
3676   /* Create the default target_options structure.  */
3677   target_option_default_node = target_option_current_node
3678     = build_target_option_node (&global_options);
3679
3680   /* Register global variables with the garbage collector.  */
3681   arm_add_gc_roots ();
3682
3683   /* Init initial mode for testing.  */
3684   thumb_flipper = TARGET_THUMB;
3685 }
3686
3687 static void
3688 arm_add_gc_roots (void)
3689 {
3690   gcc_obstack_init(&minipool_obstack);
3691   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3692 }
3693 \f
3694 /* A table of known ARM exception types.
3695    For use with the interrupt function attribute.  */
3696
3697 typedef struct
3698 {
3699   const char *const arg;
3700   const unsigned long return_value;
3701 }
3702 isr_attribute_arg;
3703
3704 static const isr_attribute_arg isr_attribute_args [] =
3705 {
3706   { "IRQ",   ARM_FT_ISR },
3707   { "irq",   ARM_FT_ISR },
3708   { "FIQ",   ARM_FT_FIQ },
3709   { "fiq",   ARM_FT_FIQ },
3710   { "ABORT", ARM_FT_ISR },
3711   { "abort", ARM_FT_ISR },
3712   { "ABORT", ARM_FT_ISR },
3713   { "abort", ARM_FT_ISR },
3714   { "UNDEF", ARM_FT_EXCEPTION },
3715   { "undef", ARM_FT_EXCEPTION },
3716   { "SWI",   ARM_FT_EXCEPTION },
3717   { "swi",   ARM_FT_EXCEPTION },
3718   { NULL,    ARM_FT_NORMAL }
3719 };
3720
3721 /* Returns the (interrupt) function type of the current
3722    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3723
3724 static unsigned long
3725 arm_isr_value (tree argument)
3726 {
3727   const isr_attribute_arg * ptr;
3728   const char *              arg;
3729
3730   if (!arm_arch_notm)
3731     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3732
3733   /* No argument - default to IRQ.  */
3734   if (argument == NULL_TREE)
3735     return ARM_FT_ISR;
3736
3737   /* Get the value of the argument.  */
3738   if (TREE_VALUE (argument) == NULL_TREE
3739       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3740     return ARM_FT_UNKNOWN;
3741
3742   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3743
3744   /* Check it against the list of known arguments.  */
3745   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3746     if (streq (arg, ptr->arg))
3747       return ptr->return_value;
3748
3749   /* An unrecognized interrupt type.  */
3750   return ARM_FT_UNKNOWN;
3751 }
3752
3753 /* Computes the type of the current function.  */
3754
3755 static unsigned long
3756 arm_compute_func_type (void)
3757 {
3758   unsigned long type = ARM_FT_UNKNOWN;
3759   tree a;
3760   tree attr;
3761
3762   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3763
3764   /* Decide if the current function is volatile.  Such functions
3765      never return, and many memory cycles can be saved by not storing
3766      register values that will never be needed again.  This optimization
3767      was added to speed up context switching in a kernel application.  */
3768   if (optimize > 0
3769       && (TREE_NOTHROW (current_function_decl)
3770           || !(flag_unwind_tables
3771                || (flag_exceptions
3772                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3773       && TREE_THIS_VOLATILE (current_function_decl))
3774     type |= ARM_FT_VOLATILE;
3775
3776   if (cfun->static_chain_decl != NULL)
3777     type |= ARM_FT_NESTED;
3778
3779   attr = DECL_ATTRIBUTES (current_function_decl);
3780
3781   a = lookup_attribute ("naked", attr);
3782   if (a != NULL_TREE)
3783     type |= ARM_FT_NAKED;
3784
3785   a = lookup_attribute ("isr", attr);
3786   if (a == NULL_TREE)
3787     a = lookup_attribute ("interrupt", attr);
3788
3789   if (a == NULL_TREE)
3790     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3791   else
3792     type |= arm_isr_value (TREE_VALUE (a));
3793
3794   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3795     type |= ARM_FT_CMSE_ENTRY;
3796
3797   return type;
3798 }
3799
3800 /* Returns the type of the current function.  */
3801
3802 unsigned long
3803 arm_current_func_type (void)
3804 {
3805   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3806     cfun->machine->func_type = arm_compute_func_type ();
3807
3808   return cfun->machine->func_type;
3809 }
3810
3811 bool
3812 arm_allocate_stack_slots_for_args (void)
3813 {
3814   /* Naked functions should not allocate stack slots for arguments.  */
3815   return !IS_NAKED (arm_current_func_type ());
3816 }
3817
3818 static bool
3819 arm_warn_func_return (tree decl)
3820 {
3821   /* Naked functions are implemented entirely in assembly, including the
3822      return sequence, so suppress warnings about this.  */
3823   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3824 }
3825
3826 \f
3827 /* Output assembler code for a block containing the constant parts
3828    of a trampoline, leaving space for the variable parts.
3829
3830    On the ARM, (if r8 is the static chain regnum, and remembering that
3831    referencing pc adds an offset of 8) the trampoline looks like:
3832            ldr          r8, [pc, #0]
3833            ldr          pc, [pc]
3834            .word        static chain value
3835            .word        function's address
3836    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3837
3838 static void
3839 arm_asm_trampoline_template (FILE *f)
3840 {
3841   fprintf (f, "\t.syntax unified\n");
3842
3843   if (TARGET_ARM)
3844     {
3845       fprintf (f, "\t.arm\n");
3846       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3847       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3848     }
3849   else if (TARGET_THUMB2)
3850     {
3851       fprintf (f, "\t.thumb\n");
3852       /* The Thumb-2 trampoline is similar to the arm implementation.
3853          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3854       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3855                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3856       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3857     }
3858   else
3859     {
3860       ASM_OUTPUT_ALIGN (f, 2);
3861       fprintf (f, "\t.code\t16\n");
3862       fprintf (f, ".Ltrampoline_start:\n");
3863       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3864       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3865       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3866       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3867       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3868       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3869     }
3870   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3871   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872 }
3873
3874 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3875
3876 static void
3877 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3878 {
3879   rtx fnaddr, mem, a_tramp;
3880
3881   emit_block_move (m_tramp, assemble_trampoline_template (),
3882                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3883
3884   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3885   emit_move_insn (mem, chain_value);
3886
3887   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3888   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3889   emit_move_insn (mem, fnaddr);
3890
3891   a_tramp = XEXP (m_tramp, 0);
3892   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3893                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3894                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3895 }
3896
3897 /* Thumb trampolines should be entered in thumb mode, so set
3898    the bottom bit of the address.  */
3899
3900 static rtx
3901 arm_trampoline_adjust_address (rtx addr)
3902 {
3903   if (TARGET_THUMB)
3904     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3905                                 NULL, 0, OPTAB_LIB_WIDEN);
3906   return addr;
3907 }
3908 \f
3909 /* Return 1 if it is possible to return using a single instruction.
3910    If SIBLING is non-null, this is a test for a return before a sibling
3911    call.  SIBLING is the call insn, so we can examine its register usage.  */
3912
3913 int
3914 use_return_insn (int iscond, rtx sibling)
3915 {
3916   int regno;
3917   unsigned int func_type;
3918   unsigned long saved_int_regs;
3919   unsigned HOST_WIDE_INT stack_adjust;
3920   arm_stack_offsets *offsets;
3921
3922   /* Never use a return instruction before reload has run.  */
3923   if (!reload_completed)
3924     return 0;
3925
3926   func_type = arm_current_func_type ();
3927
3928   /* Naked, volatile and stack alignment functions need special
3929      consideration.  */
3930   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3931     return 0;
3932
3933   /* So do interrupt functions that use the frame pointer and Thumb
3934      interrupt functions.  */
3935   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3936     return 0;
3937
3938   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3939       && !optimize_function_for_size_p (cfun))
3940     return 0;
3941
3942   offsets = arm_get_frame_offsets ();
3943   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3944
3945   /* As do variadic functions.  */
3946   if (crtl->args.pretend_args_size
3947       || cfun->machine->uses_anonymous_args
3948       /* Or if the function calls __builtin_eh_return () */
3949       || crtl->calls_eh_return
3950       /* Or if the function calls alloca */
3951       || cfun->calls_alloca
3952       /* Or if there is a stack adjustment.  However, if the stack pointer
3953          is saved on the stack, we can use a pre-incrementing stack load.  */
3954       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3955                                  && stack_adjust == 4))
3956       /* Or if the static chain register was saved above the frame, under the
3957          assumption that the stack pointer isn't saved on the stack.  */
3958       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3959           && arm_compute_static_chain_stack_bytes() != 0))
3960     return 0;
3961
3962   saved_int_regs = offsets->saved_regs_mask;
3963
3964   /* Unfortunately, the insn
3965
3966        ldmib sp, {..., sp, ...}
3967
3968      triggers a bug on most SA-110 based devices, such that the stack
3969      pointer won't be correctly restored if the instruction takes a
3970      page fault.  We work around this problem by popping r3 along with
3971      the other registers, since that is never slower than executing
3972      another instruction.
3973
3974      We test for !arm_arch5 here, because code for any architecture
3975      less than this could potentially be run on one of the buggy
3976      chips.  */
3977   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3978     {
3979       /* Validate that r3 is a call-clobbered register (always true in
3980          the default abi) ...  */
3981       if (!call_used_regs[3])
3982         return 0;
3983
3984       /* ... that it isn't being used for a return value ... */
3985       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3986         return 0;
3987
3988       /* ... or for a tail-call argument ...  */
3989       if (sibling)
3990         {
3991           gcc_assert (CALL_P (sibling));
3992
3993           if (find_regno_fusage (sibling, USE, 3))
3994             return 0;
3995         }
3996
3997       /* ... and that there are no call-saved registers in r0-r2
3998          (always true in the default ABI).  */
3999       if (saved_int_regs & 0x7)
4000         return 0;
4001     }
4002
4003   /* Can't be done if interworking with Thumb, and any registers have been
4004      stacked.  */
4005   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4006     return 0;
4007
4008   /* On StrongARM, conditional returns are expensive if they aren't
4009      taken and multiple registers have been stacked.  */
4010   if (iscond && arm_tune_strongarm)
4011     {
4012       /* Conditional return when just the LR is stored is a simple
4013          conditional-load instruction, that's not expensive.  */
4014       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4015         return 0;
4016
4017       if (flag_pic
4018           && arm_pic_register != INVALID_REGNUM
4019           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4020         return 0;
4021     }
4022
4023   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4024      several instructions if anything needs to be popped.  */
4025   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4026     return 0;
4027
4028   /* If there are saved registers but the LR isn't saved, then we need
4029      two instructions for the return.  */
4030   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4031     return 0;
4032
4033   /* Can't be done if any of the VFP regs are pushed,
4034      since this also requires an insn.  */
4035   if (TARGET_HARD_FLOAT)
4036     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4037       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4038         return 0;
4039
4040   if (TARGET_REALLY_IWMMXT)
4041     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4042       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4043         return 0;
4044
4045   return 1;
4046 }
4047
4048 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4049    shrink-wrapping if possible.  This is the case if we need to emit a
4050    prologue, which we can test by looking at the offsets.  */
4051 bool
4052 use_simple_return_p (void)
4053 {
4054   arm_stack_offsets *offsets;
4055
4056   /* Note this function can be called before or after reload.  */
4057   if (!reload_completed)
4058     arm_compute_frame_layout ();
4059
4060   offsets = arm_get_frame_offsets ();
4061   return offsets->outgoing_args != 0;
4062 }
4063
4064 /* Return TRUE if int I is a valid immediate ARM constant.  */
4065
4066 int
4067 const_ok_for_arm (HOST_WIDE_INT i)
4068 {
4069   int lowbit;
4070
4071   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4072      be all zero, or all one.  */
4073   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4074       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4075           != ((~(unsigned HOST_WIDE_INT) 0)
4076               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4077     return FALSE;
4078
4079   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4080
4081   /* Fast return for 0 and small values.  We must do this for zero, since
4082      the code below can't handle that one case.  */
4083   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4084     return TRUE;
4085
4086   /* Get the number of trailing zeros.  */
4087   lowbit = ffs((int) i) - 1;
4088
4089   /* Only even shifts are allowed in ARM mode so round down to the
4090      nearest even number.  */
4091   if (TARGET_ARM)
4092     lowbit &= ~1;
4093
4094   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4095     return TRUE;
4096
4097   if (TARGET_ARM)
4098     {
4099       /* Allow rotated constants in ARM mode.  */
4100       if (lowbit <= 4
4101            && ((i & ~0xc000003f) == 0
4102                || (i & ~0xf000000f) == 0
4103                || (i & ~0xfc000003) == 0))
4104         return TRUE;
4105     }
4106   else if (TARGET_THUMB2)
4107     {
4108       HOST_WIDE_INT v;
4109
4110       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4111       v = i & 0xff;
4112       v |= v << 16;
4113       if (i == v || i == (v | (v << 8)))
4114         return TRUE;
4115
4116       /* Allow repeated pattern 0xXY00XY00.  */
4117       v = i & 0xff00;
4118       v |= v << 16;
4119       if (i == v)
4120         return TRUE;
4121     }
4122   else if (TARGET_HAVE_MOVT)
4123     {
4124       /* Thumb-1 Targets with MOVT.  */
4125       if (i > 0xffff)
4126         return FALSE;
4127       else
4128         return TRUE;
4129     }
4130
4131   return FALSE;
4132 }
4133
4134 /* Return true if I is a valid constant for the operation CODE.  */
4135 int
4136 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4137 {
4138   if (const_ok_for_arm (i))
4139     return 1;
4140
4141   switch (code)
4142     {
4143     case SET:
4144       /* See if we can use movw.  */
4145       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4146         return 1;
4147       else
4148         /* Otherwise, try mvn.  */
4149         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150
4151     case PLUS:
4152       /* See if we can use addw or subw.  */
4153       if (TARGET_THUMB2
4154           && ((i & 0xfffff000) == 0
4155               || ((-i) & 0xfffff000) == 0))
4156         return 1;
4157       /* Fall through.  */
4158     case COMPARE:
4159     case EQ:
4160     case NE:
4161     case GT:
4162     case LE:
4163     case LT:
4164     case GE:
4165     case GEU:
4166     case LTU:
4167     case GTU:
4168     case LEU:
4169     case UNORDERED:
4170     case ORDERED:
4171     case UNEQ:
4172     case UNGE:
4173     case UNLT:
4174     case UNGT:
4175     case UNLE:
4176       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4177
4178     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4179     case XOR:
4180       return 0;
4181
4182     case IOR:
4183       if (TARGET_THUMB2)
4184         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4185       return 0;
4186
4187     case AND:
4188       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4189
4190     default:
4191       gcc_unreachable ();
4192     }
4193 }
4194
4195 /* Return true if I is a valid di mode constant for the operation CODE.  */
4196 int
4197 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4198 {
4199   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4200   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4201   rtx hi = GEN_INT (hi_val);
4202   rtx lo = GEN_INT (lo_val);
4203
4204   if (TARGET_THUMB1)
4205     return 0;
4206
4207   switch (code)
4208     {
4209     case AND:
4210     case IOR:
4211     case XOR:
4212       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4213               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4214     case PLUS:
4215       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4216
4217     default:
4218       return 0;
4219     }
4220 }
4221
4222 /* Emit a sequence of insns to handle a large constant.
4223    CODE is the code of the operation required, it can be any of SET, PLUS,
4224    IOR, AND, XOR, MINUS;
4225    MODE is the mode in which the operation is being performed;
4226    VAL is the integer to operate on;
4227    SOURCE is the other operand (a register, or a null-pointer for SET);
4228    SUBTARGETS means it is safe to create scratch registers if that will
4229    either produce a simpler sequence, or we will want to cse the values.
4230    Return value is the number of insns emitted.  */
4231
4232 /* ??? Tweak this for thumb2.  */
4233 int
4234 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4235                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4236 {
4237   rtx cond;
4238
4239   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4240     cond = COND_EXEC_TEST (PATTERN (insn));
4241   else
4242     cond = NULL_RTX;
4243
4244   if (subtargets || code == SET
4245       || (REG_P (target) && REG_P (source)
4246           && REGNO (target) != REGNO (source)))
4247     {
4248       /* After arm_reorg has been called, we can't fix up expensive
4249          constants by pushing them into memory so we must synthesize
4250          them in-line, regardless of the cost.  This is only likely to
4251          be more costly on chips that have load delay slots and we are
4252          compiling without running the scheduler (so no splitting
4253          occurred before the final instruction emission).
4254
4255          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4256       */
4257       if (!cfun->machine->after_arm_reorg
4258           && !cond
4259           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4260                                 1, 0)
4261               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4262                  + (code != SET))))
4263         {
4264           if (code == SET)
4265             {
4266               /* Currently SET is the only monadic value for CODE, all
4267                  the rest are diadic.  */
4268               if (TARGET_USE_MOVT)
4269                 arm_emit_movpair (target, GEN_INT (val));
4270               else
4271                 emit_set_insn (target, GEN_INT (val));
4272
4273               return 1;
4274             }
4275           else
4276             {
4277               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4278
4279               if (TARGET_USE_MOVT)
4280                 arm_emit_movpair (temp, GEN_INT (val));
4281               else
4282                 emit_set_insn (temp, GEN_INT (val));
4283
4284               /* For MINUS, the value is subtracted from, since we never
4285                  have subtraction of a constant.  */
4286               if (code == MINUS)
4287                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4288               else
4289                 emit_set_insn (target,
4290                                gen_rtx_fmt_ee (code, mode, source, temp));
4291               return 2;
4292             }
4293         }
4294     }
4295
4296   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4297                            1);
4298 }
4299
4300 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4301    ARM/THUMB2 immediates, and add up to VAL.
4302    Thr function return value gives the number of insns required.  */
4303 static int
4304 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4305                             struct four_ints *return_sequence)
4306 {
4307   int best_consecutive_zeros = 0;
4308   int i;
4309   int best_start = 0;
4310   int insns1, insns2;
4311   struct four_ints tmp_sequence;
4312
4313   /* If we aren't targeting ARM, the best place to start is always at
4314      the bottom, otherwise look more closely.  */
4315   if (TARGET_ARM)
4316     {
4317       for (i = 0; i < 32; i += 2)
4318         {
4319           int consecutive_zeros = 0;
4320
4321           if (!(val & (3 << i)))
4322             {
4323               while ((i < 32) && !(val & (3 << i)))
4324                 {
4325                   consecutive_zeros += 2;
4326                   i += 2;
4327                 }
4328               if (consecutive_zeros > best_consecutive_zeros)
4329                 {
4330                   best_consecutive_zeros = consecutive_zeros;
4331                   best_start = i - consecutive_zeros;
4332                 }
4333               i -= 2;
4334             }
4335         }
4336     }
4337
4338   /* So long as it won't require any more insns to do so, it's
4339      desirable to emit a small constant (in bits 0...9) in the last
4340      insn.  This way there is more chance that it can be combined with
4341      a later addressing insn to form a pre-indexed load or store
4342      operation.  Consider:
4343
4344            *((volatile int *)0xe0000100) = 1;
4345            *((volatile int *)0xe0000110) = 2;
4346
4347      We want this to wind up as:
4348
4349             mov rA, #0xe0000000
4350             mov rB, #1
4351             str rB, [rA, #0x100]
4352             mov rB, #2
4353             str rB, [rA, #0x110]
4354
4355      rather than having to synthesize both large constants from scratch.
4356
4357      Therefore, we calculate how many insns would be required to emit
4358      the constant starting from `best_start', and also starting from
4359      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4360      yield a shorter sequence, we may as well use zero.  */
4361   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4362   if (best_start != 0
4363       && ((HOST_WIDE_INT_1U << best_start) < val))
4364     {
4365       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4366       if (insns2 <= insns1)
4367         {
4368           *return_sequence = tmp_sequence;
4369           insns1 = insns2;
4370         }
4371     }
4372
4373   return insns1;
4374 }
4375
4376 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4377 static int
4378 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4379                              struct four_ints *return_sequence, int i)
4380 {
4381   int remainder = val & 0xffffffff;
4382   int insns = 0;
4383
4384   /* Try and find a way of doing the job in either two or three
4385      instructions.
4386
4387      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4388      location.  We start at position I.  This may be the MSB, or
4389      optimial_immediate_sequence may have positioned it at the largest block
4390      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4391      wrapping around to the top of the word when we drop off the bottom.
4392      In the worst case this code should produce no more than four insns.
4393
4394      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4395      constants, shifted to any arbitrary location.  We should always start
4396      at the MSB.  */
4397   do
4398     {
4399       int end;
4400       unsigned int b1, b2, b3, b4;
4401       unsigned HOST_WIDE_INT result;
4402       int loc;
4403
4404       gcc_assert (insns < 4);
4405
4406       if (i <= 0)
4407         i += 32;
4408
4409       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4410       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4411         {
4412           loc = i;
4413           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4414             /* We can use addw/subw for the last 12 bits.  */
4415             result = remainder;
4416           else
4417             {
4418               /* Use an 8-bit shifted/rotated immediate.  */
4419               end = i - 8;
4420               if (end < 0)
4421                 end += 32;
4422               result = remainder & ((0x0ff << end)
4423                                    | ((i < end) ? (0xff >> (32 - end))
4424                                                 : 0));
4425               i -= 8;
4426             }
4427         }
4428       else
4429         {
4430           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4431              arbitrary shifts.  */
4432           i -= TARGET_ARM ? 2 : 1;
4433           continue;
4434         }
4435
4436       /* Next, see if we can do a better job with a thumb2 replicated
4437          constant.
4438
4439          We do it this way around to catch the cases like 0x01F001E0 where
4440          two 8-bit immediates would work, but a replicated constant would
4441          make it worse.
4442
4443          TODO: 16-bit constants that don't clear all the bits, but still win.
4444          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4445       if (TARGET_THUMB2)
4446         {
4447           b1 = (remainder & 0xff000000) >> 24;
4448           b2 = (remainder & 0x00ff0000) >> 16;
4449           b3 = (remainder & 0x0000ff00) >> 8;
4450           b4 = remainder & 0xff;
4451
4452           if (loc > 24)
4453             {
4454               /* The 8-bit immediate already found clears b1 (and maybe b2),
4455                  but must leave b3 and b4 alone.  */
4456
4457               /* First try to find a 32-bit replicated constant that clears
4458                  almost everything.  We can assume that we can't do it in one,
4459                  or else we wouldn't be here.  */
4460               unsigned int tmp = b1 & b2 & b3 & b4;
4461               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4462                                   + (tmp << 24);
4463               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4464                                             + (tmp == b3) + (tmp == b4);
4465               if (tmp
4466                   && (matching_bytes >= 3
4467                       || (matching_bytes == 2
4468                           && const_ok_for_op (remainder & ~tmp2, code))))
4469                 {
4470                   /* At least 3 of the bytes match, and the fourth has at
4471                      least as many bits set, or two of the bytes match
4472                      and it will only require one more insn to finish.  */
4473                   result = tmp2;
4474                   i = tmp != b1 ? 32
4475                       : tmp != b2 ? 24
4476                       : tmp != b3 ? 16
4477                       : 8;
4478                 }
4479
4480               /* Second, try to find a 16-bit replicated constant that can
4481                  leave three of the bytes clear.  If b2 or b4 is already
4482                  zero, then we can.  If the 8-bit from above would not
4483                  clear b2 anyway, then we still win.  */
4484               else if (b1 == b3 && (!b2 || !b4
4485                                || (remainder & 0x00ff0000 & ~result)))
4486                 {
4487                   result = remainder & 0xff00ff00;
4488                   i = 24;
4489                 }
4490             }
4491           else if (loc > 16)
4492             {
4493               /* The 8-bit immediate already found clears b2 (and maybe b3)
4494                  and we don't get here unless b1 is alredy clear, but it will
4495                  leave b4 unchanged.  */
4496
4497               /* If we can clear b2 and b4 at once, then we win, since the
4498                  8-bits couldn't possibly reach that far.  */
4499               if (b2 == b4)
4500                 {
4501                   result = remainder & 0x00ff00ff;
4502                   i = 16;
4503                 }
4504             }
4505         }
4506
4507       return_sequence->i[insns++] = result;
4508       remainder &= ~result;
4509
4510       if (code == SET || code == MINUS)
4511         code = PLUS;
4512     }
4513   while (remainder);
4514
4515   return insns;
4516 }
4517
4518 /* Emit an instruction with the indicated PATTERN.  If COND is
4519    non-NULL, conditionalize the execution of the instruction on COND
4520    being true.  */
4521
4522 static void
4523 emit_constant_insn (rtx cond, rtx pattern)
4524 {
4525   if (cond)
4526     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4527   emit_insn (pattern);
4528 }
4529
4530 /* As above, but extra parameter GENERATE which, if clear, suppresses
4531    RTL generation.  */
4532
4533 static int
4534 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4535                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4536                   int subtargets, int generate)
4537 {
4538   int can_invert = 0;
4539   int can_negate = 0;
4540   int final_invert = 0;
4541   int i;
4542   int set_sign_bit_copies = 0;
4543   int clear_sign_bit_copies = 0;
4544   int clear_zero_bit_copies = 0;
4545   int set_zero_bit_copies = 0;
4546   int insns = 0, neg_insns, inv_insns;
4547   unsigned HOST_WIDE_INT temp1, temp2;
4548   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4549   struct four_ints *immediates;
4550   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4551
4552   /* Find out which operations are safe for a given CODE.  Also do a quick
4553      check for degenerate cases; these can occur when DImode operations
4554      are split.  */
4555   switch (code)
4556     {
4557     case SET:
4558       can_invert = 1;
4559       break;
4560
4561     case PLUS:
4562       can_negate = 1;
4563       break;
4564
4565     case IOR:
4566       if (remainder == 0xffffffff)
4567         {
4568           if (generate)
4569             emit_constant_insn (cond,
4570                                 gen_rtx_SET (target,
4571                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4572           return 1;
4573         }
4574
4575       if (remainder == 0)
4576         {
4577           if (reload_completed && rtx_equal_p (target, source))
4578             return 0;
4579
4580           if (generate)
4581             emit_constant_insn (cond, gen_rtx_SET (target, source));
4582           return 1;
4583         }
4584       break;
4585
4586     case AND:
4587       if (remainder == 0)
4588         {
4589           if (generate)
4590             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4591           return 1;
4592         }
4593       if (remainder == 0xffffffff)
4594         {
4595           if (reload_completed && rtx_equal_p (target, source))
4596             return 0;
4597           if (generate)
4598             emit_constant_insn (cond, gen_rtx_SET (target, source));
4599           return 1;
4600         }
4601       can_invert = 1;
4602       break;
4603
4604     case XOR:
4605       if (remainder == 0)
4606         {
4607           if (reload_completed && rtx_equal_p (target, source))
4608             return 0;
4609           if (generate)
4610             emit_constant_insn (cond, gen_rtx_SET (target, source));
4611           return 1;
4612         }
4613
4614       if (remainder == 0xffffffff)
4615         {
4616           if (generate)
4617             emit_constant_insn (cond,
4618                                 gen_rtx_SET (target,
4619                                              gen_rtx_NOT (mode, source)));
4620           return 1;
4621         }
4622       final_invert = 1;
4623       break;
4624
4625     case MINUS:
4626       /* We treat MINUS as (val - source), since (source - val) is always
4627          passed as (source + (-val)).  */
4628       if (remainder == 0)
4629         {
4630           if (generate)
4631             emit_constant_insn (cond,
4632                                 gen_rtx_SET (target,
4633                                              gen_rtx_NEG (mode, source)));
4634           return 1;
4635         }
4636       if (const_ok_for_arm (val))
4637         {
4638           if (generate)
4639             emit_constant_insn (cond,
4640                                 gen_rtx_SET (target,
4641                                              gen_rtx_MINUS (mode, GEN_INT (val),
4642                                                             source)));
4643           return 1;
4644         }
4645
4646       break;
4647
4648     default:
4649       gcc_unreachable ();
4650     }
4651
4652   /* If we can do it in one insn get out quickly.  */
4653   if (const_ok_for_op (val, code))
4654     {
4655       if (generate)
4656         emit_constant_insn (cond,
4657                             gen_rtx_SET (target,
4658                                          (source
4659                                           ? gen_rtx_fmt_ee (code, mode, source,
4660                                                             GEN_INT (val))
4661                                           : GEN_INT (val))));
4662       return 1;
4663     }
4664
4665   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4666      insn.  */
4667   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4668       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4669     {
4670       if (generate)
4671         {
4672           if (mode == SImode && i == 16)
4673             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4674                smaller insn.  */
4675             emit_constant_insn (cond,
4676                                 gen_zero_extendhisi2
4677                                 (target, gen_lowpart (HImode, source)));
4678           else
4679             /* Extz only supports SImode, but we can coerce the operands
4680                into that mode.  */
4681             emit_constant_insn (cond,
4682                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4683                                               gen_lowpart (SImode, source),
4684                                               GEN_INT (i), const0_rtx));
4685         }
4686
4687       return 1;
4688     }
4689
4690   /* Calculate a few attributes that may be useful for specific
4691      optimizations.  */
4692   /* Count number of leading zeros.  */
4693   for (i = 31; i >= 0; i--)
4694     {
4695       if ((remainder & (1 << i)) == 0)
4696         clear_sign_bit_copies++;
4697       else
4698         break;
4699     }
4700
4701   /* Count number of leading 1's.  */
4702   for (i = 31; i >= 0; i--)
4703     {
4704       if ((remainder & (1 << i)) != 0)
4705         set_sign_bit_copies++;
4706       else
4707         break;
4708     }
4709
4710   /* Count number of trailing zero's.  */
4711   for (i = 0; i <= 31; i++)
4712     {
4713       if ((remainder & (1 << i)) == 0)
4714         clear_zero_bit_copies++;
4715       else
4716         break;
4717     }
4718
4719   /* Count number of trailing 1's.  */
4720   for (i = 0; i <= 31; i++)
4721     {
4722       if ((remainder & (1 << i)) != 0)
4723         set_zero_bit_copies++;
4724       else
4725         break;
4726     }
4727
4728   switch (code)
4729     {
4730     case SET:
4731       /* See if we can do this by sign_extending a constant that is known
4732          to be negative.  This is a good, way of doing it, since the shift
4733          may well merge into a subsequent insn.  */
4734       if (set_sign_bit_copies > 1)
4735         {
4736           if (const_ok_for_arm
4737               (temp1 = ARM_SIGN_EXTEND (remainder
4738                                         << (set_sign_bit_copies - 1))))
4739             {
4740               if (generate)
4741                 {
4742                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4743                   emit_constant_insn (cond,
4744                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4745                   emit_constant_insn (cond,
4746                                       gen_ashrsi3 (target, new_src,
4747                                                    GEN_INT (set_sign_bit_copies - 1)));
4748                 }
4749               return 2;
4750             }
4751           /* For an inverted constant, we will need to set the low bits,
4752              these will be shifted out of harm's way.  */
4753           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4754           if (const_ok_for_arm (~temp1))
4755             {
4756               if (generate)
4757                 {
4758                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4759                   emit_constant_insn (cond,
4760                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4761                   emit_constant_insn (cond,
4762                                       gen_ashrsi3 (target, new_src,
4763                                                    GEN_INT (set_sign_bit_copies - 1)));
4764                 }
4765               return 2;
4766             }
4767         }
4768
4769       /* See if we can calculate the value as the difference between two
4770          valid immediates.  */
4771       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4772         {
4773           int topshift = clear_sign_bit_copies & ~1;
4774
4775           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4776                                    & (0xff000000 >> topshift));
4777
4778           /* If temp1 is zero, then that means the 9 most significant
4779              bits of remainder were 1 and we've caused it to overflow.
4780              When topshift is 0 we don't need to do anything since we
4781              can borrow from 'bit 32'.  */
4782           if (temp1 == 0 && topshift != 0)
4783             temp1 = 0x80000000 >> (topshift - 1);
4784
4785           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4786
4787           if (const_ok_for_arm (temp2))
4788             {
4789               if (generate)
4790                 {
4791                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4792                   emit_constant_insn (cond,
4793                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4794                   emit_constant_insn (cond,
4795                                       gen_addsi3 (target, new_src,
4796                                                   GEN_INT (-temp2)));
4797                 }
4798
4799               return 2;
4800             }
4801         }
4802
4803       /* See if we can generate this by setting the bottom (or the top)
4804          16 bits, and then shifting these into the other half of the
4805          word.  We only look for the simplest cases, to do more would cost
4806          too much.  Be careful, however, not to generate this when the
4807          alternative would take fewer insns.  */
4808       if (val & 0xffff0000)
4809         {
4810           temp1 = remainder & 0xffff0000;
4811           temp2 = remainder & 0x0000ffff;
4812
4813           /* Overlaps outside this range are best done using other methods.  */
4814           for (i = 9; i < 24; i++)
4815             {
4816               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4817                   && !const_ok_for_arm (temp2))
4818                 {
4819                   rtx new_src = (subtargets
4820                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4821                                  : target);
4822                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4823                                             source, subtargets, generate);
4824                   source = new_src;
4825                   if (generate)
4826                     emit_constant_insn
4827                       (cond,
4828                        gen_rtx_SET
4829                        (target,
4830                         gen_rtx_IOR (mode,
4831                                      gen_rtx_ASHIFT (mode, source,
4832                                                      GEN_INT (i)),
4833                                      source)));
4834                   return insns + 1;
4835                 }
4836             }
4837
4838           /* Don't duplicate cases already considered.  */
4839           for (i = 17; i < 24; i++)
4840             {
4841               if (((temp1 | (temp1 >> i)) == remainder)
4842                   && !const_ok_for_arm (temp1))
4843                 {
4844                   rtx new_src = (subtargets
4845                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4846                                  : target);
4847                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4848                                             source, subtargets, generate);
4849                   source = new_src;
4850                   if (generate)
4851                     emit_constant_insn
4852                       (cond,
4853                        gen_rtx_SET (target,
4854                                     gen_rtx_IOR
4855                                     (mode,
4856                                      gen_rtx_LSHIFTRT (mode, source,
4857                                                        GEN_INT (i)),
4858                                      source)));
4859                   return insns + 1;
4860                 }
4861             }
4862         }
4863       break;
4864
4865     case IOR:
4866     case XOR:
4867       /* If we have IOR or XOR, and the constant can be loaded in a
4868          single instruction, and we can find a temporary to put it in,
4869          then this can be done in two instructions instead of 3-4.  */
4870       if (subtargets
4871           /* TARGET can't be NULL if SUBTARGETS is 0 */
4872           || (reload_completed && !reg_mentioned_p (target, source)))
4873         {
4874           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4875             {
4876               if (generate)
4877                 {
4878                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4879
4880                   emit_constant_insn (cond,
4881                                       gen_rtx_SET (sub, GEN_INT (val)));
4882                   emit_constant_insn (cond,
4883                                       gen_rtx_SET (target,
4884                                                    gen_rtx_fmt_ee (code, mode,
4885                                                                    source, sub)));
4886                 }
4887               return 2;
4888             }
4889         }
4890
4891       if (code == XOR)
4892         break;
4893
4894       /*  Convert.
4895           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4896                              and the remainder 0s for e.g. 0xfff00000)
4897           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4898
4899           This can be done in 2 instructions by using shifts with mov or mvn.
4900           e.g. for
4901           x = x | 0xfff00000;
4902           we generate.
4903           mvn   r0, r0, asl #12
4904           mvn   r0, r0, lsr #12  */
4905       if (set_sign_bit_copies > 8
4906           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4907         {
4908           if (generate)
4909             {
4910               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4911               rtx shift = GEN_INT (set_sign_bit_copies);
4912
4913               emit_constant_insn
4914                 (cond,
4915                  gen_rtx_SET (sub,
4916                               gen_rtx_NOT (mode,
4917                                            gen_rtx_ASHIFT (mode,
4918                                                            source,
4919                                                            shift))));
4920               emit_constant_insn
4921                 (cond,
4922                  gen_rtx_SET (target,
4923                               gen_rtx_NOT (mode,
4924                                            gen_rtx_LSHIFTRT (mode, sub,
4925                                                              shift))));
4926             }
4927           return 2;
4928         }
4929
4930       /* Convert
4931           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4932            to
4933           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4934
4935           For eg. r0 = r0 | 0xfff
4936                mvn      r0, r0, lsr #12
4937                mvn      r0, r0, asl #12
4938
4939       */
4940       if (set_zero_bit_copies > 8
4941           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4942         {
4943           if (generate)
4944             {
4945               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4946               rtx shift = GEN_INT (set_zero_bit_copies);
4947
4948               emit_constant_insn
4949                 (cond,
4950                  gen_rtx_SET (sub,
4951                               gen_rtx_NOT (mode,
4952                                            gen_rtx_LSHIFTRT (mode,
4953                                                              source,
4954                                                              shift))));
4955               emit_constant_insn
4956                 (cond,
4957                  gen_rtx_SET (target,
4958                               gen_rtx_NOT (mode,
4959                                            gen_rtx_ASHIFT (mode, sub,
4960                                                            shift))));
4961             }
4962           return 2;
4963         }
4964
4965       /* This will never be reached for Thumb2 because orn is a valid
4966          instruction. This is for Thumb1 and the ARM 32 bit cases.
4967
4968          x = y | constant (such that ~constant is a valid constant)
4969          Transform this to
4970          x = ~(~y & ~constant).
4971       */
4972       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4973         {
4974           if (generate)
4975             {
4976               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4977               emit_constant_insn (cond,
4978                                   gen_rtx_SET (sub,
4979                                                gen_rtx_NOT (mode, source)));
4980               source = sub;
4981               if (subtargets)
4982                 sub = gen_reg_rtx (mode);
4983               emit_constant_insn (cond,
4984                                   gen_rtx_SET (sub,
4985                                                gen_rtx_AND (mode, source,
4986                                                             GEN_INT (temp1))));
4987               emit_constant_insn (cond,
4988                                   gen_rtx_SET (target,
4989                                                gen_rtx_NOT (mode, sub)));
4990             }
4991           return 3;
4992         }
4993       break;
4994
4995     case AND:
4996       /* See if two shifts will do 2 or more insn's worth of work.  */
4997       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4998         {
4999           HOST_WIDE_INT shift_mask = ((0xffffffff
5000                                        << (32 - clear_sign_bit_copies))
5001                                       & 0xffffffff);
5002
5003           if ((remainder | shift_mask) != 0xffffffff)
5004             {
5005               HOST_WIDE_INT new_val
5006                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5007
5008               if (generate)
5009                 {
5010                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5011                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5012                                             new_src, source, subtargets, 1);
5013                   source = new_src;
5014                 }
5015               else
5016                 {
5017                   rtx targ = subtargets ? NULL_RTX : target;
5018                   insns = arm_gen_constant (AND, mode, cond, new_val,
5019                                             targ, source, subtargets, 0);
5020                 }
5021             }
5022
5023           if (generate)
5024             {
5025               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5026               rtx shift = GEN_INT (clear_sign_bit_copies);
5027
5028               emit_insn (gen_ashlsi3 (new_src, source, shift));
5029               emit_insn (gen_lshrsi3 (target, new_src, shift));
5030             }
5031
5032           return insns + 2;
5033         }
5034
5035       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5036         {
5037           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5038
5039           if ((remainder | shift_mask) != 0xffffffff)
5040             {
5041               HOST_WIDE_INT new_val
5042                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5043               if (generate)
5044                 {
5045                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5046
5047                   insns = arm_gen_constant (AND, mode, cond, new_val,
5048                                             new_src, source, subtargets, 1);
5049                   source = new_src;
5050                 }
5051               else
5052                 {
5053                   rtx targ = subtargets ? NULL_RTX : target;
5054
5055                   insns = arm_gen_constant (AND, mode, cond, new_val,
5056                                             targ, source, subtargets, 0);
5057                 }
5058             }
5059
5060           if (generate)
5061             {
5062               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063               rtx shift = GEN_INT (clear_zero_bit_copies);
5064
5065               emit_insn (gen_lshrsi3 (new_src, source, shift));
5066               emit_insn (gen_ashlsi3 (target, new_src, shift));
5067             }
5068
5069           return insns + 2;
5070         }
5071
5072       break;
5073
5074     default:
5075       break;
5076     }
5077
5078   /* Calculate what the instruction sequences would be if we generated it
5079      normally, negated, or inverted.  */
5080   if (code == AND)
5081     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5082     insns = 99;
5083   else
5084     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5085
5086   if (can_negate)
5087     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5088                                             &neg_immediates);
5089   else
5090     neg_insns = 99;
5091
5092   if (can_invert || final_invert)
5093     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5094                                             &inv_immediates);
5095   else
5096     inv_insns = 99;
5097
5098   immediates = &pos_immediates;
5099
5100   /* Is the negated immediate sequence more efficient?  */
5101   if (neg_insns < insns && neg_insns <= inv_insns)
5102     {
5103       insns = neg_insns;
5104       immediates = &neg_immediates;
5105     }
5106   else
5107     can_negate = 0;
5108
5109   /* Is the inverted immediate sequence more efficient?
5110      We must allow for an extra NOT instruction for XOR operations, although
5111      there is some chance that the final 'mvn' will get optimized later.  */
5112   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5113     {
5114       insns = inv_insns;
5115       immediates = &inv_immediates;
5116     }
5117   else
5118     {
5119       can_invert = 0;
5120       final_invert = 0;
5121     }
5122
5123   /* Now output the chosen sequence as instructions.  */
5124   if (generate)
5125     {
5126       for (i = 0; i < insns; i++)
5127         {
5128           rtx new_src, temp1_rtx;
5129
5130           temp1 = immediates->i[i];
5131
5132           if (code == SET || code == MINUS)
5133             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5134           else if ((final_invert || i < (insns - 1)) && subtargets)
5135             new_src = gen_reg_rtx (mode);
5136           else
5137             new_src = target;
5138
5139           if (can_invert)
5140             temp1 = ~temp1;
5141           else if (can_negate)
5142             temp1 = -temp1;
5143
5144           temp1 = trunc_int_for_mode (temp1, mode);
5145           temp1_rtx = GEN_INT (temp1);
5146
5147           if (code == SET)
5148             ;
5149           else if (code == MINUS)
5150             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5151           else
5152             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5153
5154           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5155           source = new_src;
5156
5157           if (code == SET)
5158             {
5159               can_negate = can_invert;
5160               can_invert = 0;
5161               code = PLUS;
5162             }
5163           else if (code == MINUS)
5164             code = PLUS;
5165         }
5166     }
5167
5168   if (final_invert)
5169     {
5170       if (generate)
5171         emit_constant_insn (cond, gen_rtx_SET (target,
5172                                                gen_rtx_NOT (mode, source)));
5173       insns++;
5174     }
5175
5176   return insns;
5177 }
5178
5179 /* Canonicalize a comparison so that we are more likely to recognize it.
5180    This can be done for a few constant compares, where we can make the
5181    immediate value easier to load.  */
5182
5183 static void
5184 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5185                              bool op0_preserve_value)
5186 {
5187   machine_mode mode;
5188   unsigned HOST_WIDE_INT i, maxval;
5189
5190   mode = GET_MODE (*op0);
5191   if (mode == VOIDmode)
5192     mode = GET_MODE (*op1);
5193
5194   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5195
5196   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5197      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5198      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5199      for GTU/LEU in Thumb mode.  */
5200   if (mode == DImode)
5201     {
5202
5203       if (*code == GT || *code == LE
5204           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5205         {
5206           /* Missing comparison.  First try to use an available
5207              comparison.  */
5208           if (CONST_INT_P (*op1))
5209             {
5210               i = INTVAL (*op1);
5211               switch (*code)
5212                 {
5213                 case GT:
5214                 case LE:
5215                   if (i != maxval
5216                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5217                     {
5218                       *op1 = GEN_INT (i + 1);
5219                       *code = *code == GT ? GE : LT;
5220                       return;
5221                     }
5222                   break;
5223                 case GTU:
5224                 case LEU:
5225                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5226                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5227                     {
5228                       *op1 = GEN_INT (i + 1);
5229                       *code = *code == GTU ? GEU : LTU;
5230                       return;
5231                     }
5232                   break;
5233                 default:
5234                   gcc_unreachable ();
5235                 }
5236             }
5237
5238           /* If that did not work, reverse the condition.  */
5239           if (!op0_preserve_value)
5240             {
5241               std::swap (*op0, *op1);
5242               *code = (int)swap_condition ((enum rtx_code)*code);
5243             }
5244         }
5245       return;
5246     }
5247
5248   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5249      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5250      to facilitate possible combining with a cmp into 'ands'.  */
5251   if (mode == SImode
5252       && GET_CODE (*op0) == ZERO_EXTEND
5253       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5254       && GET_MODE (XEXP (*op0, 0)) == QImode
5255       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5256       && subreg_lowpart_p (XEXP (*op0, 0))
5257       && *op1 == const0_rtx)
5258     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5259                         GEN_INT (255));
5260
5261   /* Comparisons smaller than DImode.  Only adjust comparisons against
5262      an out-of-range constant.  */
5263   if (!CONST_INT_P (*op1)
5264       || const_ok_for_arm (INTVAL (*op1))
5265       || const_ok_for_arm (- INTVAL (*op1)))
5266     return;
5267
5268   i = INTVAL (*op1);
5269
5270   switch (*code)
5271     {
5272     case EQ:
5273     case NE:
5274       return;
5275
5276     case GT:
5277     case LE:
5278       if (i != maxval
5279           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5280         {
5281           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5282           *code = *code == GT ? GE : LT;
5283           return;
5284         }
5285       break;
5286
5287     case GE:
5288     case LT:
5289       if (i != ~maxval
5290           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5291         {
5292           *op1 = GEN_INT (i - 1);
5293           *code = *code == GE ? GT : LE;
5294           return;
5295         }
5296       break;
5297
5298     case GTU:
5299     case LEU:
5300       if (i != ~((unsigned HOST_WIDE_INT) 0)
5301           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5302         {
5303           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5304           *code = *code == GTU ? GEU : LTU;
5305           return;
5306         }
5307       break;
5308
5309     case GEU:
5310     case LTU:
5311       if (i != 0
5312           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5313         {
5314           *op1 = GEN_INT (i - 1);
5315           *code = *code == GEU ? GTU : LEU;
5316           return;
5317         }
5318       break;
5319
5320     default:
5321       gcc_unreachable ();
5322     }
5323 }
5324
5325
5326 /* Define how to find the value returned by a function.  */
5327
5328 static rtx
5329 arm_function_value(const_tree type, const_tree func,
5330                    bool outgoing ATTRIBUTE_UNUSED)
5331 {
5332   machine_mode mode;
5333   int unsignedp ATTRIBUTE_UNUSED;
5334   rtx r ATTRIBUTE_UNUSED;
5335
5336   mode = TYPE_MODE (type);
5337
5338   if (TARGET_AAPCS_BASED)
5339     return aapcs_allocate_return_reg (mode, type, func);
5340
5341   /* Promote integer types.  */
5342   if (INTEGRAL_TYPE_P (type))
5343     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5344
5345   /* Promotes small structs returned in a register to full-word size
5346      for big-endian AAPCS.  */
5347   if (arm_return_in_msb (type))
5348     {
5349       HOST_WIDE_INT size = int_size_in_bytes (type);
5350       if (size % UNITS_PER_WORD != 0)
5351         {
5352           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5353           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5354         }
5355     }
5356
5357   return arm_libcall_value_1 (mode);
5358 }
5359
5360 /* libcall hashtable helpers.  */
5361
5362 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5363 {
5364   static inline hashval_t hash (const rtx_def *);
5365   static inline bool equal (const rtx_def *, const rtx_def *);
5366   static inline void remove (rtx_def *);
5367 };
5368
5369 inline bool
5370 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5371 {
5372   return rtx_equal_p (p1, p2);
5373 }
5374
5375 inline hashval_t
5376 libcall_hasher::hash (const rtx_def *p1)
5377 {
5378   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5379 }
5380
5381 typedef hash_table<libcall_hasher> libcall_table_type;
5382
5383 static void
5384 add_libcall (libcall_table_type *htab, rtx libcall)
5385 {
5386   *htab->find_slot (libcall, INSERT) = libcall;
5387 }
5388
5389 static bool
5390 arm_libcall_uses_aapcs_base (const_rtx libcall)
5391 {
5392   static bool init_done = false;
5393   static libcall_table_type *libcall_htab = NULL;
5394
5395   if (!init_done)
5396     {
5397       init_done = true;
5398
5399       libcall_htab = new libcall_table_type (31);
5400       add_libcall (libcall_htab,
5401                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5402       add_libcall (libcall_htab,
5403                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5404       add_libcall (libcall_htab,
5405                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5406       add_libcall (libcall_htab,
5407                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5408
5409       add_libcall (libcall_htab,
5410                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5411       add_libcall (libcall_htab,
5412                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5413       add_libcall (libcall_htab,
5414                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5415       add_libcall (libcall_htab,
5416                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5417
5418       add_libcall (libcall_htab,
5419                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5420       add_libcall (libcall_htab,
5421                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5422       add_libcall (libcall_htab,
5423                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5424       add_libcall (libcall_htab,
5425                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5426       add_libcall (libcall_htab,
5427                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5428       add_libcall (libcall_htab,
5429                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5430       add_libcall (libcall_htab,
5431                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5434
5435       /* Values from double-precision helper functions are returned in core
5436          registers if the selected core only supports single-precision
5437          arithmetic, even if we are using the hard-float ABI.  The same is
5438          true for single-precision helpers, but we will never be using the
5439          hard-float ABI on a CPU which doesn't support single-precision
5440          operations in hardware.  */
5441       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5442       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5443       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5444       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5445       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5446       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5447       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5448       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5449       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5450       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5451       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5452       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5453                                                         SFmode));
5454       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5455                                                         DFmode));
5456       add_libcall (libcall_htab,
5457                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5458     }
5459
5460   return libcall && libcall_htab->find (libcall) != NULL;
5461 }
5462
5463 static rtx
5464 arm_libcall_value_1 (machine_mode mode)
5465 {
5466   if (TARGET_AAPCS_BASED)
5467     return aapcs_libcall_value (mode);
5468   else if (TARGET_IWMMXT_ABI
5469            && arm_vector_mode_supported_p (mode))
5470     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5471   else
5472     return gen_rtx_REG (mode, ARG_REGISTER (1));
5473 }
5474
5475 /* Define how to find the value returned by a library function
5476    assuming the value has mode MODE.  */
5477
5478 static rtx
5479 arm_libcall_value (machine_mode mode, const_rtx libcall)
5480 {
5481   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5482       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5483     {
5484       /* The following libcalls return their result in integer registers,
5485          even though they return a floating point value.  */
5486       if (arm_libcall_uses_aapcs_base (libcall))
5487         return gen_rtx_REG (mode, ARG_REGISTER(1));
5488
5489     }
5490
5491   return arm_libcall_value_1 (mode);
5492 }
5493
5494 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5495
5496 static bool
5497 arm_function_value_regno_p (const unsigned int regno)
5498 {
5499   if (regno == ARG_REGISTER (1)
5500       || (TARGET_32BIT
5501           && TARGET_AAPCS_BASED
5502           && TARGET_HARD_FLOAT
5503           && regno == FIRST_VFP_REGNUM)
5504       || (TARGET_IWMMXT_ABI
5505           && regno == FIRST_IWMMXT_REGNUM))
5506     return true;
5507
5508   return false;
5509 }
5510
5511 /* Determine the amount of memory needed to store the possible return
5512    registers of an untyped call.  */
5513 int
5514 arm_apply_result_size (void)
5515 {
5516   int size = 16;
5517
5518   if (TARGET_32BIT)
5519     {
5520       if (TARGET_HARD_FLOAT_ABI)
5521         size += 32;
5522       if (TARGET_IWMMXT_ABI)
5523         size += 8;
5524     }
5525
5526   return size;
5527 }
5528
5529 /* Decide whether TYPE should be returned in memory (true)
5530    or in a register (false).  FNTYPE is the type of the function making
5531    the call.  */
5532 static bool
5533 arm_return_in_memory (const_tree type, const_tree fntype)
5534 {
5535   HOST_WIDE_INT size;
5536
5537   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5538
5539   if (TARGET_AAPCS_BASED)
5540     {
5541       /* Simple, non-aggregate types (ie not including vectors and
5542          complex) are always returned in a register (or registers).
5543          We don't care about which register here, so we can short-cut
5544          some of the detail.  */
5545       if (!AGGREGATE_TYPE_P (type)
5546           && TREE_CODE (type) != VECTOR_TYPE
5547           && TREE_CODE (type) != COMPLEX_TYPE)
5548         return false;
5549
5550       /* Any return value that is no larger than one word can be
5551          returned in r0.  */
5552       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5553         return false;
5554
5555       /* Check any available co-processors to see if they accept the
5556          type as a register candidate (VFP, for example, can return
5557          some aggregates in consecutive registers).  These aren't
5558          available if the call is variadic.  */
5559       if (aapcs_select_return_coproc (type, fntype) >= 0)
5560         return false;
5561
5562       /* Vector values should be returned using ARM registers, not
5563          memory (unless they're over 16 bytes, which will break since
5564          we only have four call-clobbered registers to play with).  */
5565       if (TREE_CODE (type) == VECTOR_TYPE)
5566         return (size < 0 || size > (4 * UNITS_PER_WORD));
5567
5568       /* The rest go in memory.  */
5569       return true;
5570     }
5571
5572   if (TREE_CODE (type) == VECTOR_TYPE)
5573     return (size < 0 || size > (4 * UNITS_PER_WORD));
5574
5575   if (!AGGREGATE_TYPE_P (type) &&
5576       (TREE_CODE (type) != VECTOR_TYPE))
5577     /* All simple types are returned in registers.  */
5578     return false;
5579
5580   if (arm_abi != ARM_ABI_APCS)
5581     {
5582       /* ATPCS and later return aggregate types in memory only if they are
5583          larger than a word (or are variable size).  */
5584       return (size < 0 || size > UNITS_PER_WORD);
5585     }
5586
5587   /* For the arm-wince targets we choose to be compatible with Microsoft's
5588      ARM and Thumb compilers, which always return aggregates in memory.  */
5589 #ifndef ARM_WINCE
5590   /* All structures/unions bigger than one word are returned in memory.
5591      Also catch the case where int_size_in_bytes returns -1.  In this case
5592      the aggregate is either huge or of variable size, and in either case
5593      we will want to return it via memory and not in a register.  */
5594   if (size < 0 || size > UNITS_PER_WORD)
5595     return true;
5596
5597   if (TREE_CODE (type) == RECORD_TYPE)
5598     {
5599       tree field;
5600
5601       /* For a struct the APCS says that we only return in a register
5602          if the type is 'integer like' and every addressable element
5603          has an offset of zero.  For practical purposes this means
5604          that the structure can have at most one non bit-field element
5605          and that this element must be the first one in the structure.  */
5606
5607       /* Find the first field, ignoring non FIELD_DECL things which will
5608          have been created by C++.  */
5609       for (field = TYPE_FIELDS (type);
5610            field && TREE_CODE (field) != FIELD_DECL;
5611            field = DECL_CHAIN (field))
5612         continue;
5613
5614       if (field == NULL)
5615         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5616
5617       /* Check that the first field is valid for returning in a register.  */
5618
5619       /* ... Floats are not allowed */
5620       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5621         return true;
5622
5623       /* ... Aggregates that are not themselves valid for returning in
5624          a register are not allowed.  */
5625       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5626         return true;
5627
5628       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5629          since they are not addressable.  */
5630       for (field = DECL_CHAIN (field);
5631            field;
5632            field = DECL_CHAIN (field))
5633         {
5634           if (TREE_CODE (field) != FIELD_DECL)
5635             continue;
5636
5637           if (!DECL_BIT_FIELD_TYPE (field))
5638             return true;
5639         }
5640
5641       return false;
5642     }
5643
5644   if (TREE_CODE (type) == UNION_TYPE)
5645     {
5646       tree field;
5647
5648       /* Unions can be returned in registers if every element is
5649          integral, or can be returned in an integer register.  */
5650       for (field = TYPE_FIELDS (type);
5651            field;
5652            field = DECL_CHAIN (field))
5653         {
5654           if (TREE_CODE (field) != FIELD_DECL)
5655             continue;
5656
5657           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5658             return true;
5659
5660           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5661             return true;
5662         }
5663
5664       return false;
5665     }
5666 #endif /* not ARM_WINCE */
5667
5668   /* Return all other types in memory.  */
5669   return true;
5670 }
5671
5672 const struct pcs_attribute_arg
5673 {
5674   const char *arg;
5675   enum arm_pcs value;
5676 } pcs_attribute_args[] =
5677   {
5678     {"aapcs", ARM_PCS_AAPCS},
5679     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5680 #if 0
5681     /* We could recognize these, but changes would be needed elsewhere
5682      * to implement them.  */
5683     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5684     {"atpcs", ARM_PCS_ATPCS},
5685     {"apcs", ARM_PCS_APCS},
5686 #endif
5687     {NULL, ARM_PCS_UNKNOWN}
5688   };
5689
5690 static enum arm_pcs
5691 arm_pcs_from_attribute (tree attr)
5692 {
5693   const struct pcs_attribute_arg *ptr;
5694   const char *arg;
5695
5696   /* Get the value of the argument.  */
5697   if (TREE_VALUE (attr) == NULL_TREE
5698       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5699     return ARM_PCS_UNKNOWN;
5700
5701   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5702
5703   /* Check it against the list of known arguments.  */
5704   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5705     if (streq (arg, ptr->arg))
5706       return ptr->value;
5707
5708   /* An unrecognized interrupt type.  */
5709   return ARM_PCS_UNKNOWN;
5710 }
5711
5712 /* Get the PCS variant to use for this call.  TYPE is the function's type
5713    specification, DECL is the specific declartion.  DECL may be null if
5714    the call could be indirect or if this is a library call.  */
5715 static enum arm_pcs
5716 arm_get_pcs_model (const_tree type, const_tree decl)
5717 {
5718   bool user_convention = false;
5719   enum arm_pcs user_pcs = arm_pcs_default;
5720   tree attr;
5721
5722   gcc_assert (type);
5723
5724   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5725   if (attr)
5726     {
5727       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5728       user_convention = true;
5729     }
5730
5731   if (TARGET_AAPCS_BASED)
5732     {
5733       /* Detect varargs functions.  These always use the base rules
5734          (no argument is ever a candidate for a co-processor
5735          register).  */
5736       bool base_rules = stdarg_p (type);
5737
5738       if (user_convention)
5739         {
5740           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5741             sorry ("non-AAPCS derived PCS variant");
5742           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5743             error ("variadic functions must use the base AAPCS variant");
5744         }
5745
5746       if (base_rules)
5747         return ARM_PCS_AAPCS;
5748       else if (user_convention)
5749         return user_pcs;
5750       else if (decl && flag_unit_at_a_time)
5751         {
5752           /* Local functions never leak outside this compilation unit,
5753              so we are free to use whatever conventions are
5754              appropriate.  */
5755           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5756           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5757           if (i && i->local)
5758             return ARM_PCS_AAPCS_LOCAL;
5759         }
5760     }
5761   else if (user_convention && user_pcs != arm_pcs_default)
5762     sorry ("PCS variant");
5763
5764   /* For everything else we use the target's default.  */
5765   return arm_pcs_default;
5766 }
5767
5768
5769 static void
5770 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5771                     const_tree fntype ATTRIBUTE_UNUSED,
5772                     rtx libcall ATTRIBUTE_UNUSED,
5773                     const_tree fndecl ATTRIBUTE_UNUSED)
5774 {
5775   /* Record the unallocated VFP registers.  */
5776   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5777   pcum->aapcs_vfp_reg_alloc = 0;
5778 }
5779
5780 /* Walk down the type tree of TYPE counting consecutive base elements.
5781    If *MODEP is VOIDmode, then set it to the first valid floating point
5782    type.  If a non-floating point type is found, or if a floating point
5783    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5784    otherwise return the count in the sub-tree.  */
5785 static int
5786 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5787 {
5788   machine_mode mode;
5789   HOST_WIDE_INT size;
5790
5791   switch (TREE_CODE (type))
5792     {
5793     case REAL_TYPE:
5794       mode = TYPE_MODE (type);
5795       if (mode != DFmode && mode != SFmode && mode != HFmode)
5796         return -1;
5797
5798       if (*modep == VOIDmode)
5799         *modep = mode;
5800
5801       if (*modep == mode)
5802         return 1;
5803
5804       break;
5805
5806     case COMPLEX_TYPE:
5807       mode = TYPE_MODE (TREE_TYPE (type));
5808       if (mode != DFmode && mode != SFmode)
5809         return -1;
5810
5811       if (*modep == VOIDmode)
5812         *modep = mode;
5813
5814       if (*modep == mode)
5815         return 2;
5816
5817       break;
5818
5819     case VECTOR_TYPE:
5820       /* Use V2SImode and V4SImode as representatives of all 64-bit
5821          and 128-bit vector types, whether or not those modes are
5822          supported with the present options.  */
5823       size = int_size_in_bytes (type);
5824       switch (size)
5825         {
5826         case 8:
5827           mode = V2SImode;
5828           break;
5829         case 16:
5830           mode = V4SImode;
5831           break;
5832         default:
5833           return -1;
5834         }
5835
5836       if (*modep == VOIDmode)
5837         *modep = mode;
5838
5839       /* Vector modes are considered to be opaque: two vectors are
5840          equivalent for the purposes of being homogeneous aggregates
5841          if they are the same size.  */
5842       if (*modep == mode)
5843         return 1;
5844
5845       break;
5846
5847     case ARRAY_TYPE:
5848       {
5849         int count;
5850         tree index = TYPE_DOMAIN (type);
5851
5852         /* Can't handle incomplete types nor sizes that are not
5853            fixed.  */
5854         if (!COMPLETE_TYPE_P (type)
5855             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5856           return -1;
5857
5858         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5859         if (count == -1
5860             || !index
5861             || !TYPE_MAX_VALUE (index)
5862             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5863             || !TYPE_MIN_VALUE (index)
5864             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5865             || count < 0)
5866           return -1;
5867
5868         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5869                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5870
5871         /* There must be no padding.  */
5872         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5873           return -1;
5874
5875         return count;
5876       }
5877
5878     case RECORD_TYPE:
5879       {
5880         int count = 0;
5881         int sub_count;
5882         tree field;
5883
5884         /* Can't handle incomplete types nor sizes that are not
5885            fixed.  */
5886         if (!COMPLETE_TYPE_P (type)
5887             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5888           return -1;
5889
5890         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5891           {
5892             if (TREE_CODE (field) != FIELD_DECL)
5893               continue;
5894
5895             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5896             if (sub_count < 0)
5897               return -1;
5898             count += sub_count;
5899           }
5900
5901         /* There must be no padding.  */
5902         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5903           return -1;
5904
5905         return count;
5906       }
5907
5908     case UNION_TYPE:
5909     case QUAL_UNION_TYPE:
5910       {
5911         /* These aren't very interesting except in a degenerate case.  */
5912         int count = 0;
5913         int sub_count;
5914         tree field;
5915
5916         /* Can't handle incomplete types nor sizes that are not
5917            fixed.  */
5918         if (!COMPLETE_TYPE_P (type)
5919             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5920           return -1;
5921
5922         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5923           {
5924             if (TREE_CODE (field) != FIELD_DECL)
5925               continue;
5926
5927             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5928             if (sub_count < 0)
5929               return -1;
5930             count = count > sub_count ? count : sub_count;
5931           }
5932
5933         /* There must be no padding.  */
5934         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5935           return -1;
5936
5937         return count;
5938       }
5939
5940     default:
5941       break;
5942     }
5943
5944   return -1;
5945 }
5946
5947 /* Return true if PCS_VARIANT should use VFP registers.  */
5948 static bool
5949 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5950 {
5951   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5952     {
5953       static bool seen_thumb1_vfp = false;
5954
5955       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5956         {
5957           sorry ("Thumb-1 hard-float VFP ABI");
5958           /* sorry() is not immediately fatal, so only display this once.  */
5959           seen_thumb1_vfp = true;
5960         }
5961
5962       return true;
5963     }
5964
5965   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5966     return false;
5967
5968   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5969           (TARGET_VFP_DOUBLE || !is_double));
5970 }
5971
5972 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5973    suitable for passing or returning in VFP registers for the PCS
5974    variant selected.  If it is, then *BASE_MODE is updated to contain
5975    a machine mode describing each element of the argument's type and
5976    *COUNT to hold the number of such elements.  */
5977 static bool
5978 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5979                                        machine_mode mode, const_tree type,
5980                                        machine_mode *base_mode, int *count)
5981 {
5982   machine_mode new_mode = VOIDmode;
5983
5984   /* If we have the type information, prefer that to working things
5985      out from the mode.  */
5986   if (type)
5987     {
5988       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5989
5990       if (ag_count > 0 && ag_count <= 4)
5991         *count = ag_count;
5992       else
5993         return false;
5994     }
5995   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5996            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5997            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5998     {
5999       *count = 1;
6000       new_mode = mode;
6001     }
6002   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6003     {
6004       *count = 2;
6005       new_mode = (mode == DCmode ? DFmode : SFmode);
6006     }
6007   else
6008     return false;
6009
6010
6011   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6012     return false;
6013
6014   *base_mode = new_mode;
6015   return true;
6016 }
6017
6018 static bool
6019 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6020                                machine_mode mode, const_tree type)
6021 {
6022   int count ATTRIBUTE_UNUSED;
6023   machine_mode ag_mode ATTRIBUTE_UNUSED;
6024
6025   if (!use_vfp_abi (pcs_variant, false))
6026     return false;
6027   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6028                                                 &ag_mode, &count);
6029 }
6030
6031 static bool
6032 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6033                              const_tree type)
6034 {
6035   if (!use_vfp_abi (pcum->pcs_variant, false))
6036     return false;
6037
6038   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6039                                                 &pcum->aapcs_vfp_rmode,
6040                                                 &pcum->aapcs_vfp_rcount);
6041 }
6042
6043 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6044    for the behaviour of this function.  */
6045
6046 static bool
6047 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6048                     const_tree type  ATTRIBUTE_UNUSED)
6049 {
6050   int rmode_size
6051     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6052   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6053   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6054   int regno;
6055
6056   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6057     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6058       {
6059         pcum->aapcs_vfp_reg_alloc = mask << regno;
6060         if (mode == BLKmode
6061             || (mode == TImode && ! TARGET_NEON)
6062             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6063           {
6064             int i;
6065             int rcount = pcum->aapcs_vfp_rcount;
6066             int rshift = shift;
6067             machine_mode rmode = pcum->aapcs_vfp_rmode;
6068             rtx par;
6069             if (!TARGET_NEON)
6070               {
6071                 /* Avoid using unsupported vector modes.  */
6072                 if (rmode == V2SImode)
6073                   rmode = DImode;
6074                 else if (rmode == V4SImode)
6075                   {
6076                     rmode = DImode;
6077                     rcount *= 2;
6078                     rshift /= 2;
6079                   }
6080               }
6081             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6082             for (i = 0; i < rcount; i++)
6083               {
6084                 rtx tmp = gen_rtx_REG (rmode,
6085                                        FIRST_VFP_REGNUM + regno + i * rshift);
6086                 tmp = gen_rtx_EXPR_LIST
6087                   (VOIDmode, tmp,
6088                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6089                 XVECEXP (par, 0, i) = tmp;
6090               }
6091
6092             pcum->aapcs_reg = par;
6093           }
6094         else
6095           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6096         return true;
6097       }
6098   return false;
6099 }
6100
6101 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6102    comment there for the behaviour of this function.  */
6103
6104 static rtx
6105 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6106                                machine_mode mode,
6107                                const_tree type ATTRIBUTE_UNUSED)
6108 {
6109   if (!use_vfp_abi (pcs_variant, false))
6110     return NULL;
6111
6112   if (mode == BLKmode
6113       || (GET_MODE_CLASS (mode) == MODE_INT
6114           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6115           && !TARGET_NEON))
6116     {
6117       int count;
6118       machine_mode ag_mode;
6119       int i;
6120       rtx par;
6121       int shift;
6122
6123       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6124                                              &ag_mode, &count);
6125
6126       if (!TARGET_NEON)
6127         {
6128           if (ag_mode == V2SImode)
6129             ag_mode = DImode;
6130           else if (ag_mode == V4SImode)
6131             {
6132               ag_mode = DImode;
6133               count *= 2;
6134             }
6135         }
6136       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6137       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6138       for (i = 0; i < count; i++)
6139         {
6140           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6141           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6142                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6143           XVECEXP (par, 0, i) = tmp;
6144         }
6145
6146       return par;
6147     }
6148
6149   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6150 }
6151
6152 static void
6153 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6154                    machine_mode mode  ATTRIBUTE_UNUSED,
6155                    const_tree type  ATTRIBUTE_UNUSED)
6156 {
6157   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6158   pcum->aapcs_vfp_reg_alloc = 0;
6159   return;
6160 }
6161
6162 #define AAPCS_CP(X)                             \
6163   {                                             \
6164     aapcs_ ## X ## _cum_init,                   \
6165     aapcs_ ## X ## _is_call_candidate,          \
6166     aapcs_ ## X ## _allocate,                   \
6167     aapcs_ ## X ## _is_return_candidate,        \
6168     aapcs_ ## X ## _allocate_return_reg,        \
6169     aapcs_ ## X ## _advance                     \
6170   }
6171
6172 /* Table of co-processors that can be used to pass arguments in
6173    registers.  Idealy no arugment should be a candidate for more than
6174    one co-processor table entry, but the table is processed in order
6175    and stops after the first match.  If that entry then fails to put
6176    the argument into a co-processor register, the argument will go on
6177    the stack.  */
6178 static struct
6179 {
6180   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6181   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6182
6183   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6184      BLKmode) is a candidate for this co-processor's registers; this
6185      function should ignore any position-dependent state in
6186      CUMULATIVE_ARGS and only use call-type dependent information.  */
6187   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6188
6189   /* Return true if the argument does get a co-processor register; it
6190      should set aapcs_reg to an RTX of the register allocated as is
6191      required for a return from FUNCTION_ARG.  */
6192   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6193
6194   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6195      be returned in this co-processor's registers.  */
6196   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6197
6198   /* Allocate and return an RTX element to hold the return type of a call.  This
6199      routine must not fail and will only be called if is_return_candidate
6200      returned true with the same parameters.  */
6201   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6202
6203   /* Finish processing this argument and prepare to start processing
6204      the next one.  */
6205   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6206 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6207   {
6208     AAPCS_CP(vfp)
6209   };
6210
6211 #undef AAPCS_CP
6212
6213 static int
6214 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6215                           const_tree type)
6216 {
6217   int i;
6218
6219   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6220     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6221       return i;
6222
6223   return -1;
6224 }
6225
6226 static int
6227 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6228 {
6229   /* We aren't passed a decl, so we can't check that a call is local.
6230      However, it isn't clear that that would be a win anyway, since it
6231      might limit some tail-calling opportunities.  */
6232   enum arm_pcs pcs_variant;
6233
6234   if (fntype)
6235     {
6236       const_tree fndecl = NULL_TREE;
6237
6238       if (TREE_CODE (fntype) == FUNCTION_DECL)
6239         {
6240           fndecl = fntype;
6241           fntype = TREE_TYPE (fntype);
6242         }
6243
6244       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6245     }
6246   else
6247     pcs_variant = arm_pcs_default;
6248
6249   if (pcs_variant != ARM_PCS_AAPCS)
6250     {
6251       int i;
6252
6253       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6254         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6255                                                         TYPE_MODE (type),
6256                                                         type))
6257           return i;
6258     }
6259   return -1;
6260 }
6261
6262 static rtx
6263 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6264                            const_tree fntype)
6265 {
6266   /* We aren't passed a decl, so we can't check that a call is local.
6267      However, it isn't clear that that would be a win anyway, since it
6268      might limit some tail-calling opportunities.  */
6269   enum arm_pcs pcs_variant;
6270   int unsignedp ATTRIBUTE_UNUSED;
6271
6272   if (fntype)
6273     {
6274       const_tree fndecl = NULL_TREE;
6275
6276       if (TREE_CODE (fntype) == FUNCTION_DECL)
6277         {
6278           fndecl = fntype;
6279           fntype = TREE_TYPE (fntype);
6280         }
6281
6282       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6283     }
6284   else
6285     pcs_variant = arm_pcs_default;
6286
6287   /* Promote integer types.  */
6288   if (type && INTEGRAL_TYPE_P (type))
6289     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6290
6291   if (pcs_variant != ARM_PCS_AAPCS)
6292     {
6293       int i;
6294
6295       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6296         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6297                                                         type))
6298           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6299                                                              mode, type);
6300     }
6301
6302   /* Promotes small structs returned in a register to full-word size
6303      for big-endian AAPCS.  */
6304   if (type && arm_return_in_msb (type))
6305     {
6306       HOST_WIDE_INT size = int_size_in_bytes (type);
6307       if (size % UNITS_PER_WORD != 0)
6308         {
6309           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6310           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6311         }
6312     }
6313
6314   return gen_rtx_REG (mode, R0_REGNUM);
6315 }
6316
6317 static rtx
6318 aapcs_libcall_value (machine_mode mode)
6319 {
6320   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6321       && GET_MODE_SIZE (mode) <= 4)
6322     mode = SImode;
6323
6324   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6325 }
6326
6327 /* Lay out a function argument using the AAPCS rules.  The rule
6328    numbers referred to here are those in the AAPCS.  */
6329 static void
6330 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6331                   const_tree type, bool named)
6332 {
6333   int nregs, nregs2;
6334   int ncrn;
6335
6336   /* We only need to do this once per argument.  */
6337   if (pcum->aapcs_arg_processed)
6338     return;
6339
6340   pcum->aapcs_arg_processed = true;
6341
6342   /* Special case: if named is false then we are handling an incoming
6343      anonymous argument which is on the stack.  */
6344   if (!named)
6345     return;
6346
6347   /* Is this a potential co-processor register candidate?  */
6348   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6349     {
6350       int slot = aapcs_select_call_coproc (pcum, mode, type);
6351       pcum->aapcs_cprc_slot = slot;
6352
6353       /* We don't have to apply any of the rules from part B of the
6354          preparation phase, these are handled elsewhere in the
6355          compiler.  */
6356
6357       if (slot >= 0)
6358         {
6359           /* A Co-processor register candidate goes either in its own
6360              class of registers or on the stack.  */
6361           if (!pcum->aapcs_cprc_failed[slot])
6362             {
6363               /* C1.cp - Try to allocate the argument to co-processor
6364                  registers.  */
6365               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6366                 return;
6367
6368               /* C2.cp - Put the argument on the stack and note that we
6369                  can't assign any more candidates in this slot.  We also
6370                  need to note that we have allocated stack space, so that
6371                  we won't later try to split a non-cprc candidate between
6372                  core registers and the stack.  */
6373               pcum->aapcs_cprc_failed[slot] = true;
6374               pcum->can_split = false;
6375             }
6376
6377           /* We didn't get a register, so this argument goes on the
6378              stack.  */
6379           gcc_assert (pcum->can_split == false);
6380           return;
6381         }
6382     }
6383
6384   /* C3 - For double-word aligned arguments, round the NCRN up to the
6385      next even number.  */
6386   ncrn = pcum->aapcs_ncrn;
6387   if (ncrn & 1)
6388     {
6389       int res = arm_needs_doubleword_align (mode, type);
6390       /* Only warn during RTL expansion of call stmts, otherwise we would
6391          warn e.g. during gimplification even on functions that will be
6392          always inlined, and we'd warn multiple times.  Don't warn when
6393          called in expand_function_start either, as we warn instead in
6394          arm_function_arg_boundary in that case.  */
6395       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6396         inform (input_location, "parameter passing for argument of type "
6397                 "%qT changed in GCC 7.1", type);
6398       else if (res > 0)
6399         ncrn++;
6400     }
6401
6402   nregs = ARM_NUM_REGS2(mode, type);
6403
6404   /* Sigh, this test should really assert that nregs > 0, but a GCC
6405      extension allows empty structs and then gives them empty size; it
6406      then allows such a structure to be passed by value.  For some of
6407      the code below we have to pretend that such an argument has
6408      non-zero size so that we 'locate' it correctly either in
6409      registers or on the stack.  */
6410   gcc_assert (nregs >= 0);
6411
6412   nregs2 = nregs ? nregs : 1;
6413
6414   /* C4 - Argument fits entirely in core registers.  */
6415   if (ncrn + nregs2 <= NUM_ARG_REGS)
6416     {
6417       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6418       pcum->aapcs_next_ncrn = ncrn + nregs;
6419       return;
6420     }
6421
6422   /* C5 - Some core registers left and there are no arguments already
6423      on the stack: split this argument between the remaining core
6424      registers and the stack.  */
6425   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6426     {
6427       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6428       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6429       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6430       return;
6431     }
6432
6433   /* C6 - NCRN is set to 4.  */
6434   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6435
6436   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6437   return;
6438 }
6439
6440 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6441    for a call to a function whose data type is FNTYPE.
6442    For a library call, FNTYPE is NULL.  */
6443 void
6444 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6445                           rtx libname,
6446                           tree fndecl ATTRIBUTE_UNUSED)
6447 {
6448   /* Long call handling.  */
6449   if (fntype)
6450     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6451   else
6452     pcum->pcs_variant = arm_pcs_default;
6453
6454   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6455     {
6456       if (arm_libcall_uses_aapcs_base (libname))
6457         pcum->pcs_variant = ARM_PCS_AAPCS;
6458
6459       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6460       pcum->aapcs_reg = NULL_RTX;
6461       pcum->aapcs_partial = 0;
6462       pcum->aapcs_arg_processed = false;
6463       pcum->aapcs_cprc_slot = -1;
6464       pcum->can_split = true;
6465
6466       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6467         {
6468           int i;
6469
6470           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6471             {
6472               pcum->aapcs_cprc_failed[i] = false;
6473               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6474             }
6475         }
6476       return;
6477     }
6478
6479   /* Legacy ABIs */
6480
6481   /* On the ARM, the offset starts at 0.  */
6482   pcum->nregs = 0;
6483   pcum->iwmmxt_nregs = 0;
6484   pcum->can_split = true;
6485
6486   /* Varargs vectors are treated the same as long long.
6487      named_count avoids having to change the way arm handles 'named' */
6488   pcum->named_count = 0;
6489   pcum->nargs = 0;
6490
6491   if (TARGET_REALLY_IWMMXT && fntype)
6492     {
6493       tree fn_arg;
6494
6495       for (fn_arg = TYPE_ARG_TYPES (fntype);
6496            fn_arg;
6497            fn_arg = TREE_CHAIN (fn_arg))
6498         pcum->named_count += 1;
6499
6500       if (! pcum->named_count)
6501         pcum->named_count = INT_MAX;
6502     }
6503 }
6504
6505 /* Return 1 if double word alignment is required for argument passing.
6506    Return -1 if double word alignment used to be required for argument
6507    passing before PR77728 ABI fix, but is not required anymore.
6508    Return 0 if double word alignment is not required and wasn't requried
6509    before either.  */
6510 static int
6511 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6512 {
6513   if (!type)
6514     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6515
6516   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6517   if (!AGGREGATE_TYPE_P (type))
6518     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6519
6520   /* Array types: Use member alignment of element type.  */
6521   if (TREE_CODE (type) == ARRAY_TYPE)
6522     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6523
6524   int ret = 0;
6525   /* Record/aggregate types: Use greatest member alignment of any member.  */
6526   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6528       {
6529         if (TREE_CODE (field) == FIELD_DECL)
6530           return 1;
6531         else
6532           /* Before PR77728 fix, we were incorrectly considering also
6533              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6534              Make sure we can warn about that with -Wpsabi.  */
6535           ret = -1;
6536       }
6537
6538   return ret;
6539 }
6540
6541
6542 /* Determine where to put an argument to a function.
6543    Value is zero to push the argument on the stack,
6544    or a hard register in which to store the argument.
6545
6546    MODE is the argument's machine mode.
6547    TYPE is the data type of the argument (as a tree).
6548     This is null for libcalls where that information may
6549     not be available.
6550    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6551     the preceding args and about the function being called.
6552    NAMED is nonzero if this argument is a named parameter
6553     (otherwise it is an extra parameter matching an ellipsis).
6554
6555    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6556    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6557    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6558    defined), say it is passed in the stack (function_prologue will
6559    indeed make it pass in the stack if necessary).  */
6560
6561 static rtx
6562 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6563                   const_tree type, bool named)
6564 {
6565   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6566   int nregs;
6567
6568   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6569      a call insn (op3 of a call_value insn).  */
6570   if (mode == VOIDmode)
6571     return const0_rtx;
6572
6573   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6574     {
6575       aapcs_layout_arg (pcum, mode, type, named);
6576       return pcum->aapcs_reg;
6577     }
6578
6579   /* Varargs vectors are treated the same as long long.
6580      named_count avoids having to change the way arm handles 'named' */
6581   if (TARGET_IWMMXT_ABI
6582       && arm_vector_mode_supported_p (mode)
6583       && pcum->named_count > pcum->nargs + 1)
6584     {
6585       if (pcum->iwmmxt_nregs <= 9)
6586         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6587       else
6588         {
6589           pcum->can_split = false;
6590           return NULL_RTX;
6591         }
6592     }
6593
6594   /* Put doubleword aligned quantities in even register pairs.  */
6595   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6596     {
6597       int res = arm_needs_doubleword_align (mode, type);
6598       if (res < 0 && warn_psabi)
6599         inform (input_location, "parameter passing for argument of type "
6600                 "%qT changed in GCC 7.1", type);
6601       else if (res > 0)
6602         pcum->nregs++;
6603     }
6604
6605   /* Only allow splitting an arg between regs and memory if all preceding
6606      args were allocated to regs.  For args passed by reference we only count
6607      the reference pointer.  */
6608   if (pcum->can_split)
6609     nregs = 1;
6610   else
6611     nregs = ARM_NUM_REGS2 (mode, type);
6612
6613   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6614     return NULL_RTX;
6615
6616   return gen_rtx_REG (mode, pcum->nregs);
6617 }
6618
6619 static unsigned int
6620 arm_function_arg_boundary (machine_mode mode, const_tree type)
6621 {
6622   if (!ARM_DOUBLEWORD_ALIGN)
6623     return PARM_BOUNDARY;
6624
6625   int res = arm_needs_doubleword_align (mode, type);
6626   if (res < 0 && warn_psabi)
6627     inform (input_location, "parameter passing for argument of type %qT "
6628             "changed in GCC 7.1", type);
6629
6630   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6631 }
6632
6633 static int
6634 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6635                        tree type, bool named)
6636 {
6637   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6638   int nregs = pcum->nregs;
6639
6640   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6641     {
6642       aapcs_layout_arg (pcum, mode, type, named);
6643       return pcum->aapcs_partial;
6644     }
6645
6646   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6647     return 0;
6648
6649   if (NUM_ARG_REGS > nregs
6650       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6651       && pcum->can_split)
6652     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6653
6654   return 0;
6655 }
6656
6657 /* Update the data in PCUM to advance over an argument
6658    of mode MODE and data type TYPE.
6659    (TYPE is null for libcalls where that information may not be available.)  */
6660
6661 static void
6662 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6663                           const_tree type, bool named)
6664 {
6665   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6666
6667   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668     {
6669       aapcs_layout_arg (pcum, mode, type, named);
6670
6671       if (pcum->aapcs_cprc_slot >= 0)
6672         {
6673           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6674                                                               type);
6675           pcum->aapcs_cprc_slot = -1;
6676         }
6677
6678       /* Generic stuff.  */
6679       pcum->aapcs_arg_processed = false;
6680       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6681       pcum->aapcs_reg = NULL_RTX;
6682       pcum->aapcs_partial = 0;
6683     }
6684   else
6685     {
6686       pcum->nargs += 1;
6687       if (arm_vector_mode_supported_p (mode)
6688           && pcum->named_count > pcum->nargs
6689           && TARGET_IWMMXT_ABI)
6690         pcum->iwmmxt_nregs += 1;
6691       else
6692         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6693     }
6694 }
6695
6696 /* Variable sized types are passed by reference.  This is a GCC
6697    extension to the ARM ABI.  */
6698
6699 static bool
6700 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6701                        machine_mode mode ATTRIBUTE_UNUSED,
6702                        const_tree type, bool named ATTRIBUTE_UNUSED)
6703 {
6704   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6705 }
6706 \f
6707 /* Encode the current state of the #pragma [no_]long_calls.  */
6708 typedef enum
6709 {
6710   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6711   LONG,         /* #pragma long_calls is in effect.  */
6712   SHORT         /* #pragma no_long_calls is in effect.  */
6713 } arm_pragma_enum;
6714
6715 static arm_pragma_enum arm_pragma_long_calls = OFF;
6716
6717 void
6718 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6719 {
6720   arm_pragma_long_calls = LONG;
6721 }
6722
6723 void
6724 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6725 {
6726   arm_pragma_long_calls = SHORT;
6727 }
6728
6729 void
6730 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6731 {
6732   arm_pragma_long_calls = OFF;
6733 }
6734 \f
6735 /* Handle an attribute requiring a FUNCTION_DECL;
6736    arguments as in struct attribute_spec.handler.  */
6737 static tree
6738 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6739                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6740 {
6741   if (TREE_CODE (*node) != FUNCTION_DECL)
6742     {
6743       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6744                name);
6745       *no_add_attrs = true;
6746     }
6747
6748   return NULL_TREE;
6749 }
6750
6751 /* Handle an "interrupt" or "isr" attribute;
6752    arguments as in struct attribute_spec.handler.  */
6753 static tree
6754 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6755                           bool *no_add_attrs)
6756 {
6757   if (DECL_P (*node))
6758     {
6759       if (TREE_CODE (*node) != FUNCTION_DECL)
6760         {
6761           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6762                    name);
6763           *no_add_attrs = true;
6764         }
6765       /* FIXME: the argument if any is checked for type attributes;
6766          should it be checked for decl ones?  */
6767     }
6768   else
6769     {
6770       if (TREE_CODE (*node) == FUNCTION_TYPE
6771           || TREE_CODE (*node) == METHOD_TYPE)
6772         {
6773           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6774             {
6775               warning (OPT_Wattributes, "%qE attribute ignored",
6776                        name);
6777               *no_add_attrs = true;
6778             }
6779         }
6780       else if (TREE_CODE (*node) == POINTER_TYPE
6781                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6782                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6783                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6784         {
6785           *node = build_variant_type_copy (*node);
6786           TREE_TYPE (*node) = build_type_attribute_variant
6787             (TREE_TYPE (*node),
6788              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6789           *no_add_attrs = true;
6790         }
6791       else
6792         {
6793           /* Possibly pass this attribute on from the type to a decl.  */
6794           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6795                        | (int) ATTR_FLAG_FUNCTION_NEXT
6796                        | (int) ATTR_FLAG_ARRAY_NEXT))
6797             {
6798               *no_add_attrs = true;
6799               return tree_cons (name, args, NULL_TREE);
6800             }
6801           else
6802             {
6803               warning (OPT_Wattributes, "%qE attribute ignored",
6804                        name);
6805             }
6806         }
6807     }
6808
6809   return NULL_TREE;
6810 }
6811
6812 /* Handle a "pcs" attribute; arguments as in struct
6813    attribute_spec.handler.  */
6814 static tree
6815 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6816                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6817 {
6818   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6819     {
6820       warning (OPT_Wattributes, "%qE attribute ignored", name);
6821       *no_add_attrs = true;
6822     }
6823   return NULL_TREE;
6824 }
6825
6826 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6827 /* Handle the "notshared" attribute.  This attribute is another way of
6828    requesting hidden visibility.  ARM's compiler supports
6829    "__declspec(notshared)"; we support the same thing via an
6830    attribute.  */
6831
6832 static tree
6833 arm_handle_notshared_attribute (tree *node,
6834                                 tree name ATTRIBUTE_UNUSED,
6835                                 tree args ATTRIBUTE_UNUSED,
6836                                 int flags ATTRIBUTE_UNUSED,
6837                                 bool *no_add_attrs)
6838 {
6839   tree decl = TYPE_NAME (*node);
6840
6841   if (decl)
6842     {
6843       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6844       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6845       *no_add_attrs = false;
6846     }
6847   return NULL_TREE;
6848 }
6849 #endif
6850
6851 /* This function returns true if a function with declaration FNDECL and type
6852    FNTYPE uses the stack to pass arguments or return variables and false
6853    otherwise.  This is used for functions with the attributes
6854    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6855    diagnostic messages if the stack is used.  NAME is the name of the attribute
6856    used.  */
6857
6858 static bool
6859 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6860 {
6861   function_args_iterator args_iter;
6862   CUMULATIVE_ARGS args_so_far_v;
6863   cumulative_args_t args_so_far;
6864   bool first_param = true;
6865   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6866
6867   /* Error out if any argument is passed on the stack.  */
6868   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6869   args_so_far = pack_cumulative_args (&args_so_far_v);
6870   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6871     {
6872       rtx arg_rtx;
6873       machine_mode arg_mode = TYPE_MODE (arg_type);
6874
6875       prev_arg_type = arg_type;
6876       if (VOID_TYPE_P (arg_type))
6877         continue;
6878
6879       if (!first_param)
6880         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6881       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6882       if (!arg_rtx
6883           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6884         {
6885           error ("%qE attribute not available to functions with arguments "
6886                  "passed on the stack", name);
6887           return true;
6888         }
6889       first_param = false;
6890     }
6891
6892   /* Error out for variadic functions since we cannot control how many
6893      arguments will be passed and thus stack could be used.  stdarg_p () is not
6894      used for the checking to avoid browsing arguments twice.  */
6895   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6896     {
6897       error ("%qE attribute not available to functions with variable number "
6898              "of arguments", name);
6899       return true;
6900     }
6901
6902   /* Error out if return value is passed on the stack.  */
6903   ret_type = TREE_TYPE (fntype);
6904   if (arm_return_in_memory (ret_type, fntype))
6905     {
6906       error ("%qE attribute not available to functions that return value on "
6907              "the stack", name);
6908       return true;
6909     }
6910   return false;
6911 }
6912
6913 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6914    function will check whether the attribute is allowed here and will add the
6915    attribute to the function declaration tree or otherwise issue a warning.  */
6916
6917 static tree
6918 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6919                                  tree /* args */,
6920                                  int /* flags */,
6921                                  bool *no_add_attrs)
6922 {
6923   tree fndecl;
6924
6925   if (!use_cmse)
6926     {
6927       *no_add_attrs = true;
6928       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6929                name);
6930       return NULL_TREE;
6931     }
6932
6933   /* Ignore attribute for function types.  */
6934   if (TREE_CODE (*node) != FUNCTION_DECL)
6935     {
6936       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6937                name);
6938       *no_add_attrs = true;
6939       return NULL_TREE;
6940     }
6941
6942   fndecl = *node;
6943
6944   /* Warn for static linkage functions.  */
6945   if (!TREE_PUBLIC (fndecl))
6946     {
6947       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6948                "with static linkage", name);
6949       *no_add_attrs = true;
6950       return NULL_TREE;
6951     }
6952
6953   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6954                                                 TREE_TYPE (fndecl));
6955   return NULL_TREE;
6956 }
6957
6958
6959 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6960    function will check whether the attribute is allowed here and will add the
6961    attribute to the function type tree or otherwise issue a diagnostic.  The
6962    reason we check this at declaration time is to only allow the use of the
6963    attribute with declarations of function pointers and not function
6964    declarations.  This function checks NODE is of the expected type and issues
6965    diagnostics otherwise using NAME.  If it is not of the expected type
6966    *NO_ADD_ATTRS will be set to true.  */
6967
6968 static tree
6969 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6970                                  tree /* args */,
6971                                  int /* flags */,
6972                                  bool *no_add_attrs)
6973 {
6974   tree decl = NULL_TREE, fntype = NULL_TREE;
6975   tree type;
6976
6977   if (!use_cmse)
6978     {
6979       *no_add_attrs = true;
6980       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6981                name);
6982       return NULL_TREE;
6983     }
6984
6985   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6986     {
6987       decl = *node;
6988       fntype = TREE_TYPE (decl);
6989     }
6990
6991   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6992     fntype = TREE_TYPE (fntype);
6993
6994   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6995     {
6996         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6997                  "function pointer", name);
6998         *no_add_attrs = true;
6999         return NULL_TREE;
7000     }
7001
7002   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7003
7004   if (*no_add_attrs)
7005     return NULL_TREE;
7006
7007   /* Prevent trees being shared among function types with and without
7008      cmse_nonsecure_call attribute.  */
7009   type = TREE_TYPE (decl);
7010
7011   type = build_distinct_type_copy (type);
7012   TREE_TYPE (decl) = type;
7013   fntype = type;
7014
7015   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7016     {
7017       type = fntype;
7018       fntype = TREE_TYPE (fntype);
7019       fntype = build_distinct_type_copy (fntype);
7020       TREE_TYPE (type) = fntype;
7021     }
7022
7023   /* Construct a type attribute and add it to the function type.  */
7024   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7025                           TYPE_ATTRIBUTES (fntype));
7026   TYPE_ATTRIBUTES (fntype) = attrs;
7027   return NULL_TREE;
7028 }
7029
7030 /* Return 0 if the attributes for two types are incompatible, 1 if they
7031    are compatible, and 2 if they are nearly compatible (which causes a
7032    warning to be generated).  */
7033 static int
7034 arm_comp_type_attributes (const_tree type1, const_tree type2)
7035 {
7036   int l1, l2, s1, s2;
7037
7038   /* Check for mismatch of non-default calling convention.  */
7039   if (TREE_CODE (type1) != FUNCTION_TYPE)
7040     return 1;
7041
7042   /* Check for mismatched call attributes.  */
7043   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7044   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7045   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7046   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7047
7048   /* Only bother to check if an attribute is defined.  */
7049   if (l1 | l2 | s1 | s2)
7050     {
7051       /* If one type has an attribute, the other must have the same attribute.  */
7052       if ((l1 != l2) || (s1 != s2))
7053         return 0;
7054
7055       /* Disallow mixed attributes.  */
7056       if ((l1 & s2) || (l2 & s1))
7057         return 0;
7058     }
7059
7060   /* Check for mismatched ISR attribute.  */
7061   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7062   if (! l1)
7063     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7064   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7065   if (! l2)
7066     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7067   if (l1 != l2)
7068     return 0;
7069
7070   l1 = lookup_attribute ("cmse_nonsecure_call",
7071                          TYPE_ATTRIBUTES (type1)) != NULL;
7072   l2 = lookup_attribute ("cmse_nonsecure_call",
7073                          TYPE_ATTRIBUTES (type2)) != NULL;
7074
7075   if (l1 != l2)
7076     return 0;
7077
7078   return 1;
7079 }
7080
7081 /*  Assigns default attributes to newly defined type.  This is used to
7082     set short_call/long_call attributes for function types of
7083     functions defined inside corresponding #pragma scopes.  */
7084 static void
7085 arm_set_default_type_attributes (tree type)
7086 {
7087   /* Add __attribute__ ((long_call)) to all functions, when
7088      inside #pragma long_calls or __attribute__ ((short_call)),
7089      when inside #pragma no_long_calls.  */
7090   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7091     {
7092       tree type_attr_list, attr_name;
7093       type_attr_list = TYPE_ATTRIBUTES (type);
7094
7095       if (arm_pragma_long_calls == LONG)
7096         attr_name = get_identifier ("long_call");
7097       else if (arm_pragma_long_calls == SHORT)
7098         attr_name = get_identifier ("short_call");
7099       else
7100         return;
7101
7102       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7103       TYPE_ATTRIBUTES (type) = type_attr_list;
7104     }
7105 }
7106 \f
7107 /* Return true if DECL is known to be linked into section SECTION.  */
7108
7109 static bool
7110 arm_function_in_section_p (tree decl, section *section)
7111 {
7112   /* We can only be certain about the prevailing symbol definition.  */
7113   if (!decl_binds_to_current_def_p (decl))
7114     return false;
7115
7116   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7117   if (!DECL_SECTION_NAME (decl))
7118     {
7119       /* Make sure that we will not create a unique section for DECL.  */
7120       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7121         return false;
7122     }
7123
7124   return function_section (decl) == section;
7125 }
7126
7127 /* Return nonzero if a 32-bit "long_call" should be generated for
7128    a call from the current function to DECL.  We generate a long_call
7129    if the function:
7130
7131         a.  has an __attribute__((long call))
7132      or b.  is within the scope of a #pragma long_calls
7133      or c.  the -mlong-calls command line switch has been specified
7134
7135    However we do not generate a long call if the function:
7136
7137         d.  has an __attribute__ ((short_call))
7138      or e.  is inside the scope of a #pragma no_long_calls
7139      or f.  is defined in the same section as the current function.  */
7140
7141 bool
7142 arm_is_long_call_p (tree decl)
7143 {
7144   tree attrs;
7145
7146   if (!decl)
7147     return TARGET_LONG_CALLS;
7148
7149   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7150   if (lookup_attribute ("short_call", attrs))
7151     return false;
7152
7153   /* For "f", be conservative, and only cater for cases in which the
7154      whole of the current function is placed in the same section.  */
7155   if (!flag_reorder_blocks_and_partition
7156       && TREE_CODE (decl) == FUNCTION_DECL
7157       && arm_function_in_section_p (decl, current_function_section ()))
7158     return false;
7159
7160   if (lookup_attribute ("long_call", attrs))
7161     return true;
7162
7163   return TARGET_LONG_CALLS;
7164 }
7165
7166 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7167 static bool
7168 arm_function_ok_for_sibcall (tree decl, tree exp)
7169 {
7170   unsigned long func_type;
7171
7172   if (cfun->machine->sibcall_blocked)
7173     return false;
7174
7175   /* Never tailcall something if we are generating code for Thumb-1.  */
7176   if (TARGET_THUMB1)
7177     return false;
7178
7179   /* The PIC register is live on entry to VxWorks PLT entries, so we
7180      must make the call before restoring the PIC register.  */
7181   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7182     return false;
7183
7184   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7185      may be used both as target of the call and base register for restoring
7186      the VFP registers  */
7187   if (TARGET_APCS_FRAME && TARGET_ARM
7188       && TARGET_HARD_FLOAT
7189       && decl && arm_is_long_call_p (decl))
7190     return false;
7191
7192   /* If we are interworking and the function is not declared static
7193      then we can't tail-call it unless we know that it exists in this
7194      compilation unit (since it might be a Thumb routine).  */
7195   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7196       && !TREE_ASM_WRITTEN (decl))
7197     return false;
7198
7199   func_type = arm_current_func_type ();
7200   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7201   if (IS_INTERRUPT (func_type))
7202     return false;
7203
7204   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7205      generated for entry functions themselves.  */
7206   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7207     return false;
7208
7209   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7210      this would complicate matters for later code generation.  */
7211   if (TREE_CODE (exp) == CALL_EXPR)
7212     {
7213       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7214       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7215         return false;
7216     }
7217
7218   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7219     {
7220       /* Check that the return value locations are the same.  For
7221          example that we aren't returning a value from the sibling in
7222          a VFP register but then need to transfer it to a core
7223          register.  */
7224       rtx a, b;
7225       tree decl_or_type = decl;
7226
7227       /* If it is an indirect function pointer, get the function type.  */
7228       if (!decl)
7229         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7230
7231       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7232       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7233                               cfun->decl, false);
7234       if (!rtx_equal_p (a, b))
7235         return false;
7236     }
7237
7238   /* Never tailcall if function may be called with a misaligned SP.  */
7239   if (IS_STACKALIGN (func_type))
7240     return false;
7241
7242   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7243      references should become a NOP.  Don't convert such calls into
7244      sibling calls.  */
7245   if (TARGET_AAPCS_BASED
7246       && arm_abi == ARM_ABI_AAPCS
7247       && decl
7248       && DECL_WEAK (decl))
7249     return false;
7250
7251   /* We cannot do a tailcall for an indirect call by descriptor if all the
7252      argument registers are used because the only register left to load the
7253      address is IP and it will already contain the static chain.  */
7254   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7255     {
7256       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7257       CUMULATIVE_ARGS cum;
7258       cumulative_args_t cum_v;
7259
7260       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7261       cum_v = pack_cumulative_args (&cum);
7262
7263       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7264         {
7265           tree type = TREE_VALUE (t);
7266           if (!VOID_TYPE_P (type))
7267             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7268         }
7269
7270       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7271         return false;
7272     }
7273
7274   /* Everything else is ok.  */
7275   return true;
7276 }
7277
7278 \f
7279 /* Addressing mode support functions.  */
7280
7281 /* Return nonzero if X is a legitimate immediate operand when compiling
7282    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7283 int
7284 legitimate_pic_operand_p (rtx x)
7285 {
7286   if (GET_CODE (x) == SYMBOL_REF
7287       || (GET_CODE (x) == CONST
7288           && GET_CODE (XEXP (x, 0)) == PLUS
7289           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7290     return 0;
7291
7292   return 1;
7293 }
7294
7295 /* Record that the current function needs a PIC register.  Initialize
7296    cfun->machine->pic_reg if we have not already done so.  */
7297
7298 static void
7299 require_pic_register (void)
7300 {
7301   /* A lot of the logic here is made obscure by the fact that this
7302      routine gets called as part of the rtx cost estimation process.
7303      We don't want those calls to affect any assumptions about the real
7304      function; and further, we can't call entry_of_function() until we
7305      start the real expansion process.  */
7306   if (!crtl->uses_pic_offset_table)
7307     {
7308       gcc_assert (can_create_pseudo_p ());
7309       if (arm_pic_register != INVALID_REGNUM
7310           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7311         {
7312           if (!cfun->machine->pic_reg)
7313             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7314
7315           /* Play games to avoid marking the function as needing pic
7316              if we are being called as part of the cost-estimation
7317              process.  */
7318           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7319             crtl->uses_pic_offset_table = 1;
7320         }
7321       else
7322         {
7323           rtx_insn *seq, *insn;
7324
7325           if (!cfun->machine->pic_reg)
7326             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7327
7328           /* Play games to avoid marking the function as needing pic
7329              if we are being called as part of the cost-estimation
7330              process.  */
7331           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7332             {
7333               crtl->uses_pic_offset_table = 1;
7334               start_sequence ();
7335
7336               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7337                   && arm_pic_register > LAST_LO_REGNUM)
7338                 emit_move_insn (cfun->machine->pic_reg,
7339                                 gen_rtx_REG (Pmode, arm_pic_register));
7340               else
7341                 arm_load_pic_register (0UL);
7342
7343               seq = get_insns ();
7344               end_sequence ();
7345
7346               for (insn = seq; insn; insn = NEXT_INSN (insn))
7347                 if (INSN_P (insn))
7348                   INSN_LOCATION (insn) = prologue_location;
7349
7350               /* We can be called during expansion of PHI nodes, where
7351                  we can't yet emit instructions directly in the final
7352                  insn stream.  Queue the insns on the entry edge, they will
7353                  be committed after everything else is expanded.  */
7354               insert_insn_on_edge (seq,
7355                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7356             }
7357         }
7358     }
7359 }
7360
7361 rtx
7362 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7363 {
7364   if (GET_CODE (orig) == SYMBOL_REF
7365       || GET_CODE (orig) == LABEL_REF)
7366     {
7367       if (reg == 0)
7368         {
7369           gcc_assert (can_create_pseudo_p ());
7370           reg = gen_reg_rtx (Pmode);
7371         }
7372
7373       /* VxWorks does not impose a fixed gap between segments; the run-time
7374          gap can be different from the object-file gap.  We therefore can't
7375          use GOTOFF unless we are absolutely sure that the symbol is in the
7376          same segment as the GOT.  Unfortunately, the flexibility of linker
7377          scripts means that we can't be sure of that in general, so assume
7378          that GOTOFF is never valid on VxWorks.  */
7379       /* References to weak symbols cannot be resolved locally: they
7380          may be overridden by a non-weak definition at link time.  */
7381       rtx_insn *insn;
7382       if ((GET_CODE (orig) == LABEL_REF
7383            || (GET_CODE (orig) == SYMBOL_REF
7384                && SYMBOL_REF_LOCAL_P (orig)
7385                && (SYMBOL_REF_DECL (orig)
7386                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7387           && NEED_GOT_RELOC
7388           && arm_pic_data_is_text_relative)
7389         insn = arm_pic_static_addr (orig, reg);
7390       else
7391         {
7392           rtx pat;
7393           rtx mem;
7394
7395           /* If this function doesn't have a pic register, create one now.  */
7396           require_pic_register ();
7397
7398           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7399
7400           /* Make the MEM as close to a constant as possible.  */
7401           mem = SET_SRC (pat);
7402           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7403           MEM_READONLY_P (mem) = 1;
7404           MEM_NOTRAP_P (mem) = 1;
7405
7406           insn = emit_insn (pat);
7407         }
7408
7409       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7410          by loop.  */
7411       set_unique_reg_note (insn, REG_EQUAL, orig);
7412
7413       return reg;
7414     }
7415   else if (GET_CODE (orig) == CONST)
7416     {
7417       rtx base, offset;
7418
7419       if (GET_CODE (XEXP (orig, 0)) == PLUS
7420           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7421         return orig;
7422
7423       /* Handle the case where we have: const (UNSPEC_TLS).  */
7424       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7425           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7426         return orig;
7427
7428       /* Handle the case where we have:
7429          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7430          CONST_INT.  */
7431       if (GET_CODE (XEXP (orig, 0)) == PLUS
7432           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7433           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7434         {
7435           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7436           return orig;
7437         }
7438
7439       if (reg == 0)
7440         {
7441           gcc_assert (can_create_pseudo_p ());
7442           reg = gen_reg_rtx (Pmode);
7443         }
7444
7445       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7446
7447       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7448       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7449                                        base == reg ? 0 : reg);
7450
7451       if (CONST_INT_P (offset))
7452         {
7453           /* The base register doesn't really matter, we only want to
7454              test the index for the appropriate mode.  */
7455           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7456             {
7457               gcc_assert (can_create_pseudo_p ());
7458               offset = force_reg (Pmode, offset);
7459             }
7460
7461           if (CONST_INT_P (offset))
7462             return plus_constant (Pmode, base, INTVAL (offset));
7463         }
7464
7465       if (GET_MODE_SIZE (mode) > 4
7466           && (GET_MODE_CLASS (mode) == MODE_INT
7467               || TARGET_SOFT_FLOAT))
7468         {
7469           emit_insn (gen_addsi3 (reg, base, offset));
7470           return reg;
7471         }
7472
7473       return gen_rtx_PLUS (Pmode, base, offset);
7474     }
7475
7476   return orig;
7477 }
7478
7479
7480 /* Find a spare register to use during the prolog of a function.  */
7481
7482 static int
7483 thumb_find_work_register (unsigned long pushed_regs_mask)
7484 {
7485   int reg;
7486
7487   /* Check the argument registers first as these are call-used.  The
7488      register allocation order means that sometimes r3 might be used
7489      but earlier argument registers might not, so check them all.  */
7490   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7491     if (!df_regs_ever_live_p (reg))
7492       return reg;
7493
7494   /* Before going on to check the call-saved registers we can try a couple
7495      more ways of deducing that r3 is available.  The first is when we are
7496      pushing anonymous arguments onto the stack and we have less than 4
7497      registers worth of fixed arguments(*).  In this case r3 will be part of
7498      the variable argument list and so we can be sure that it will be
7499      pushed right at the start of the function.  Hence it will be available
7500      for the rest of the prologue.
7501      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7502   if (cfun->machine->uses_anonymous_args
7503       && crtl->args.pretend_args_size > 0)
7504     return LAST_ARG_REGNUM;
7505
7506   /* The other case is when we have fixed arguments but less than 4 registers
7507      worth.  In this case r3 might be used in the body of the function, but
7508      it is not being used to convey an argument into the function.  In theory
7509      we could just check crtl->args.size to see how many bytes are
7510      being passed in argument registers, but it seems that it is unreliable.
7511      Sometimes it will have the value 0 when in fact arguments are being
7512      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7513      check the args_info.nregs field as well.  The problem with this field is
7514      that it makes no allowances for arguments that are passed to the
7515      function but which are not used.  Hence we could miss an opportunity
7516      when a function has an unused argument in r3.  But it is better to be
7517      safe than to be sorry.  */
7518   if (! cfun->machine->uses_anonymous_args
7519       && crtl->args.size >= 0
7520       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7521       && (TARGET_AAPCS_BASED
7522           ? crtl->args.info.aapcs_ncrn < 4
7523           : crtl->args.info.nregs < 4))
7524     return LAST_ARG_REGNUM;
7525
7526   /* Otherwise look for a call-saved register that is going to be pushed.  */
7527   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7528     if (pushed_regs_mask & (1 << reg))
7529       return reg;
7530
7531   if (TARGET_THUMB2)
7532     {
7533       /* Thumb-2 can use high regs.  */
7534       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7535         if (pushed_regs_mask & (1 << reg))
7536           return reg;
7537     }
7538   /* Something went wrong - thumb_compute_save_reg_mask()
7539      should have arranged for a suitable register to be pushed.  */
7540   gcc_unreachable ();
7541 }
7542
7543 static GTY(()) int pic_labelno;
7544
7545 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7546    low register.  */
7547
7548 void
7549 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7550 {
7551   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7552
7553   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7554     return;
7555
7556   gcc_assert (flag_pic);
7557
7558   pic_reg = cfun->machine->pic_reg;
7559   if (TARGET_VXWORKS_RTP)
7560     {
7561       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7562       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7563       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7564
7565       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7566
7567       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7568       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7569     }
7570   else
7571     {
7572       /* We use an UNSPEC rather than a LABEL_REF because this label
7573          never appears in the code stream.  */
7574
7575       labelno = GEN_INT (pic_labelno++);
7576       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7577       l1 = gen_rtx_CONST (VOIDmode, l1);
7578
7579       /* On the ARM the PC register contains 'dot + 8' at the time of the
7580          addition, on the Thumb it is 'dot + 4'.  */
7581       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7582       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7583                                 UNSPEC_GOTSYM_OFF);
7584       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7585
7586       if (TARGET_32BIT)
7587         {
7588           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7589         }
7590       else /* TARGET_THUMB1 */
7591         {
7592           if (arm_pic_register != INVALID_REGNUM
7593               && REGNO (pic_reg) > LAST_LO_REGNUM)
7594             {
7595               /* We will have pushed the pic register, so we should always be
7596                  able to find a work register.  */
7597               pic_tmp = gen_rtx_REG (SImode,
7598                                      thumb_find_work_register (saved_regs));
7599               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7600               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7601               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7602             }
7603           else if (arm_pic_register != INVALID_REGNUM
7604                    && arm_pic_register > LAST_LO_REGNUM
7605                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7606             {
7607               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7608               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7609               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7610             }
7611           else
7612             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7613         }
7614     }
7615
7616   /* Need to emit this whether or not we obey regdecls,
7617      since setjmp/longjmp can cause life info to screw up.  */
7618   emit_use (pic_reg);
7619 }
7620
7621 /* Generate code to load the address of a static var when flag_pic is set.  */
7622 static rtx_insn *
7623 arm_pic_static_addr (rtx orig, rtx reg)
7624 {
7625   rtx l1, labelno, offset_rtx;
7626
7627   gcc_assert (flag_pic);
7628
7629   /* We use an UNSPEC rather than a LABEL_REF because this label
7630      never appears in the code stream.  */
7631   labelno = GEN_INT (pic_labelno++);
7632   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7633   l1 = gen_rtx_CONST (VOIDmode, l1);
7634
7635   /* On the ARM the PC register contains 'dot + 8' at the time of the
7636      addition, on the Thumb it is 'dot + 4'.  */
7637   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7638   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7639                                UNSPEC_SYMBOL_OFFSET);
7640   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7641
7642   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7643 }
7644
7645 /* Return nonzero if X is valid as an ARM state addressing register.  */
7646 static int
7647 arm_address_register_rtx_p (rtx x, int strict_p)
7648 {
7649   int regno;
7650
7651   if (!REG_P (x))
7652     return 0;
7653
7654   regno = REGNO (x);
7655
7656   if (strict_p)
7657     return ARM_REGNO_OK_FOR_BASE_P (regno);
7658
7659   return (regno <= LAST_ARM_REGNUM
7660           || regno >= FIRST_PSEUDO_REGISTER
7661           || regno == FRAME_POINTER_REGNUM
7662           || regno == ARG_POINTER_REGNUM);
7663 }
7664
7665 /* Return TRUE if this rtx is the difference of a symbol and a label,
7666    and will reduce to a PC-relative relocation in the object file.
7667    Expressions like this can be left alone when generating PIC, rather
7668    than forced through the GOT.  */
7669 static int
7670 pcrel_constant_p (rtx x)
7671 {
7672   if (GET_CODE (x) == MINUS)
7673     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7674
7675   return FALSE;
7676 }
7677
7678 /* Return true if X will surely end up in an index register after next
7679    splitting pass.  */
7680 static bool
7681 will_be_in_index_register (const_rtx x)
7682 {
7683   /* arm.md: calculate_pic_address will split this into a register.  */
7684   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7685 }
7686
7687 /* Return nonzero if X is a valid ARM state address operand.  */
7688 int
7689 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7690                                 int strict_p)
7691 {
7692   bool use_ldrd;
7693   enum rtx_code code = GET_CODE (x);
7694
7695   if (arm_address_register_rtx_p (x, strict_p))
7696     return 1;
7697
7698   use_ldrd = (TARGET_LDRD
7699               && (mode == DImode || mode == DFmode));
7700
7701   if (code == POST_INC || code == PRE_DEC
7702       || ((code == PRE_INC || code == POST_DEC)
7703           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7704     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7705
7706   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7707            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7708            && GET_CODE (XEXP (x, 1)) == PLUS
7709            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7710     {
7711       rtx addend = XEXP (XEXP (x, 1), 1);
7712
7713       /* Don't allow ldrd post increment by register because it's hard
7714          to fixup invalid register choices.  */
7715       if (use_ldrd
7716           && GET_CODE (x) == POST_MODIFY
7717           && REG_P (addend))
7718         return 0;
7719
7720       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7721               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7722     }
7723
7724   /* After reload constants split into minipools will have addresses
7725      from a LABEL_REF.  */
7726   else if (reload_completed
7727            && (code == LABEL_REF
7728                || (code == CONST
7729                    && GET_CODE (XEXP (x, 0)) == PLUS
7730                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7731                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7732     return 1;
7733
7734   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7735     return 0;
7736
7737   else if (code == PLUS)
7738     {
7739       rtx xop0 = XEXP (x, 0);
7740       rtx xop1 = XEXP (x, 1);
7741
7742       return ((arm_address_register_rtx_p (xop0, strict_p)
7743                && ((CONST_INT_P (xop1)
7744                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7745                    || (!strict_p && will_be_in_index_register (xop1))))
7746               || (arm_address_register_rtx_p (xop1, strict_p)
7747                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7748     }
7749
7750 #if 0
7751   /* Reload currently can't handle MINUS, so disable this for now */
7752   else if (GET_CODE (x) == MINUS)
7753     {
7754       rtx xop0 = XEXP (x, 0);
7755       rtx xop1 = XEXP (x, 1);
7756
7757       return (arm_address_register_rtx_p (xop0, strict_p)
7758               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7759     }
7760 #endif
7761
7762   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7763            && code == SYMBOL_REF
7764            && CONSTANT_POOL_ADDRESS_P (x)
7765            && ! (flag_pic
7766                  && symbol_mentioned_p (get_pool_constant (x))
7767                  && ! pcrel_constant_p (get_pool_constant (x))))
7768     return 1;
7769
7770   return 0;
7771 }
7772
7773 /* Return true if we can avoid creating a constant pool entry for x.  */
7774 static bool
7775 can_avoid_literal_pool_for_label_p (rtx x)
7776 {
7777   /* Normally we can assign constant values to target registers without
7778      the help of constant pool.  But there are cases we have to use constant
7779      pool like:
7780      1) assign a label to register.
7781      2) sign-extend a 8bit value to 32bit and then assign to register.
7782
7783      Constant pool access in format:
7784      (set (reg r0) (mem (symbol_ref (".LC0"))))
7785      will cause the use of literal pool (later in function arm_reorg).
7786      So here we mark such format as an invalid format, then the compiler
7787      will adjust it into:
7788      (set (reg r0) (symbol_ref (".LC0")))
7789      (set (reg r0) (mem (reg r0))).
7790      No extra register is required, and (mem (reg r0)) won't cause the use
7791      of literal pools.  */
7792   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7793       && CONSTANT_POOL_ADDRESS_P (x))
7794     return 1;
7795   return 0;
7796 }
7797
7798
7799 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7800 static int
7801 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7802 {
7803   bool use_ldrd;
7804   enum rtx_code code = GET_CODE (x);
7805
7806   if (arm_address_register_rtx_p (x, strict_p))
7807     return 1;
7808
7809   use_ldrd = (TARGET_LDRD
7810               && (mode == DImode || mode == DFmode));
7811
7812   if (code == POST_INC || code == PRE_DEC
7813       || ((code == PRE_INC || code == POST_DEC)
7814           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7815     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7816
7817   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7818            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7819            && GET_CODE (XEXP (x, 1)) == PLUS
7820            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7821     {
7822       /* Thumb-2 only has autoincrement by constant.  */
7823       rtx addend = XEXP (XEXP (x, 1), 1);
7824       HOST_WIDE_INT offset;
7825
7826       if (!CONST_INT_P (addend))
7827         return 0;
7828
7829       offset = INTVAL(addend);
7830       if (GET_MODE_SIZE (mode) <= 4)
7831         return (offset > -256 && offset < 256);
7832
7833       return (use_ldrd && offset > -1024 && offset < 1024
7834               && (offset & 3) == 0);
7835     }
7836
7837   /* After reload constants split into minipools will have addresses
7838      from a LABEL_REF.  */
7839   else if (reload_completed
7840            && (code == LABEL_REF
7841                || (code == CONST
7842                    && GET_CODE (XEXP (x, 0)) == PLUS
7843                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7844                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7845     return 1;
7846
7847   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7848     return 0;
7849
7850   else if (code == PLUS)
7851     {
7852       rtx xop0 = XEXP (x, 0);
7853       rtx xop1 = XEXP (x, 1);
7854
7855       return ((arm_address_register_rtx_p (xop0, strict_p)
7856                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7857                    || (!strict_p && will_be_in_index_register (xop1))))
7858               || (arm_address_register_rtx_p (xop1, strict_p)
7859                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7860     }
7861
7862   else if (can_avoid_literal_pool_for_label_p (x))
7863     return 0;
7864
7865   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7866            && code == SYMBOL_REF
7867            && CONSTANT_POOL_ADDRESS_P (x)
7868            && ! (flag_pic
7869                  && symbol_mentioned_p (get_pool_constant (x))
7870                  && ! pcrel_constant_p (get_pool_constant (x))))
7871     return 1;
7872
7873   return 0;
7874 }
7875
7876 /* Return nonzero if INDEX is valid for an address index operand in
7877    ARM state.  */
7878 static int
7879 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7880                         int strict_p)
7881 {
7882   HOST_WIDE_INT range;
7883   enum rtx_code code = GET_CODE (index);
7884
7885   /* Standard coprocessor addressing modes.  */
7886   if (TARGET_HARD_FLOAT
7887       && (mode == SFmode || mode == DFmode))
7888     return (code == CONST_INT && INTVAL (index) < 1024
7889             && INTVAL (index) > -1024
7890             && (INTVAL (index) & 3) == 0);
7891
7892   /* For quad modes, we restrict the constant offset to be slightly less
7893      than what the instruction format permits.  We do this because for
7894      quad mode moves, we will actually decompose them into two separate
7895      double-mode reads or writes.  INDEX must therefore be a valid
7896      (double-mode) offset and so should INDEX+8.  */
7897   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7898     return (code == CONST_INT
7899             && INTVAL (index) < 1016
7900             && INTVAL (index) > -1024
7901             && (INTVAL (index) & 3) == 0);
7902
7903   /* We have no such constraint on double mode offsets, so we permit the
7904      full range of the instruction format.  */
7905   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7906     return (code == CONST_INT
7907             && INTVAL (index) < 1024
7908             && INTVAL (index) > -1024
7909             && (INTVAL (index) & 3) == 0);
7910
7911   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7912     return (code == CONST_INT
7913             && INTVAL (index) < 1024
7914             && INTVAL (index) > -1024
7915             && (INTVAL (index) & 3) == 0);
7916
7917   if (arm_address_register_rtx_p (index, strict_p)
7918       && (GET_MODE_SIZE (mode) <= 4))
7919     return 1;
7920
7921   if (mode == DImode || mode == DFmode)
7922     {
7923       if (code == CONST_INT)
7924         {
7925           HOST_WIDE_INT val = INTVAL (index);
7926
7927           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7928              If vldr is selected it uses arm_coproc_mem_operand.  */
7929           if (TARGET_LDRD)
7930             return val > -256 && val < 256;
7931           else
7932             return val > -4096 && val < 4092;
7933         }
7934
7935       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7936     }
7937
7938   if (GET_MODE_SIZE (mode) <= 4
7939       && ! (arm_arch4
7940             && (mode == HImode
7941                 || mode == HFmode
7942                 || (mode == QImode && outer == SIGN_EXTEND))))
7943     {
7944       if (code == MULT)
7945         {
7946           rtx xiop0 = XEXP (index, 0);
7947           rtx xiop1 = XEXP (index, 1);
7948
7949           return ((arm_address_register_rtx_p (xiop0, strict_p)
7950                    && power_of_two_operand (xiop1, SImode))
7951                   || (arm_address_register_rtx_p (xiop1, strict_p)
7952                       && power_of_two_operand (xiop0, SImode)));
7953         }
7954       else if (code == LSHIFTRT || code == ASHIFTRT
7955                || code == ASHIFT || code == ROTATERT)
7956         {
7957           rtx op = XEXP (index, 1);
7958
7959           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7960                   && CONST_INT_P (op)
7961                   && INTVAL (op) > 0
7962                   && INTVAL (op) <= 31);
7963         }
7964     }
7965
7966   /* For ARM v4 we may be doing a sign-extend operation during the
7967      load.  */
7968   if (arm_arch4)
7969     {
7970       if (mode == HImode
7971           || mode == HFmode
7972           || (outer == SIGN_EXTEND && mode == QImode))
7973         range = 256;
7974       else
7975         range = 4096;
7976     }
7977   else
7978     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7979
7980   return (code == CONST_INT
7981           && INTVAL (index) < range
7982           && INTVAL (index) > -range);
7983 }
7984
7985 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7986    index operand.  i.e. 1, 2, 4 or 8.  */
7987 static bool
7988 thumb2_index_mul_operand (rtx op)
7989 {
7990   HOST_WIDE_INT val;
7991
7992   if (!CONST_INT_P (op))
7993     return false;
7994
7995   val = INTVAL(op);
7996   return (val == 1 || val == 2 || val == 4 || val == 8);
7997 }
7998
7999 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8000 static int
8001 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8002 {
8003   enum rtx_code code = GET_CODE (index);
8004
8005   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8006   /* Standard coprocessor addressing modes.  */
8007   if (TARGET_HARD_FLOAT
8008       && (mode == SFmode || mode == DFmode))
8009     return (code == CONST_INT && INTVAL (index) < 1024
8010             /* Thumb-2 allows only > -256 index range for it's core register
8011                load/stores. Since we allow SF/DF in core registers, we have
8012                to use the intersection between -256~4096 (core) and -1024~1024
8013                (coprocessor).  */
8014             && INTVAL (index) > -256
8015             && (INTVAL (index) & 3) == 0);
8016
8017   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8018     {
8019       /* For DImode assume values will usually live in core regs
8020          and only allow LDRD addressing modes.  */
8021       if (!TARGET_LDRD || mode != DImode)
8022         return (code == CONST_INT
8023                 && INTVAL (index) < 1024
8024                 && INTVAL (index) > -1024
8025                 && (INTVAL (index) & 3) == 0);
8026     }
8027
8028   /* For quad modes, we restrict the constant offset to be slightly less
8029      than what the instruction format permits.  We do this because for
8030      quad mode moves, we will actually decompose them into two separate
8031      double-mode reads or writes.  INDEX must therefore be a valid
8032      (double-mode) offset and so should INDEX+8.  */
8033   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8034     return (code == CONST_INT
8035             && INTVAL (index) < 1016
8036             && INTVAL (index) > -1024
8037             && (INTVAL (index) & 3) == 0);
8038
8039   /* We have no such constraint on double mode offsets, so we permit the
8040      full range of the instruction format.  */
8041   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8042     return (code == CONST_INT
8043             && INTVAL (index) < 1024
8044             && INTVAL (index) > -1024
8045             && (INTVAL (index) & 3) == 0);
8046
8047   if (arm_address_register_rtx_p (index, strict_p)
8048       && (GET_MODE_SIZE (mode) <= 4))
8049     return 1;
8050
8051   if (mode == DImode || mode == DFmode)
8052     {
8053       if (code == CONST_INT)
8054         {
8055           HOST_WIDE_INT val = INTVAL (index);
8056           /* Thumb-2 ldrd only has reg+const addressing modes.
8057              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8058              If vldr is selected it uses arm_coproc_mem_operand.  */
8059           if (TARGET_LDRD)
8060             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8061           else
8062             return IN_RANGE (val, -255, 4095 - 4);
8063         }
8064       else
8065         return 0;
8066     }
8067
8068   if (code == MULT)
8069     {
8070       rtx xiop0 = XEXP (index, 0);
8071       rtx xiop1 = XEXP (index, 1);
8072
8073       return ((arm_address_register_rtx_p (xiop0, strict_p)
8074                && thumb2_index_mul_operand (xiop1))
8075               || (arm_address_register_rtx_p (xiop1, strict_p)
8076                   && thumb2_index_mul_operand (xiop0)));
8077     }
8078   else if (code == ASHIFT)
8079     {
8080       rtx op = XEXP (index, 1);
8081
8082       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8083               && CONST_INT_P (op)
8084               && INTVAL (op) > 0
8085               && INTVAL (op) <= 3);
8086     }
8087
8088   return (code == CONST_INT
8089           && INTVAL (index) < 4096
8090           && INTVAL (index) > -256);
8091 }
8092
8093 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8094 static int
8095 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8096 {
8097   int regno;
8098
8099   if (!REG_P (x))
8100     return 0;
8101
8102   regno = REGNO (x);
8103
8104   if (strict_p)
8105     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8106
8107   return (regno <= LAST_LO_REGNUM
8108           || regno > LAST_VIRTUAL_REGISTER
8109           || regno == FRAME_POINTER_REGNUM
8110           || (GET_MODE_SIZE (mode) >= 4
8111               && (regno == STACK_POINTER_REGNUM
8112                   || regno >= FIRST_PSEUDO_REGISTER
8113                   || x == hard_frame_pointer_rtx
8114                   || x == arg_pointer_rtx)));
8115 }
8116
8117 /* Return nonzero if x is a legitimate index register.  This is the case
8118    for any base register that can access a QImode object.  */
8119 inline static int
8120 thumb1_index_register_rtx_p (rtx x, int strict_p)
8121 {
8122   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8123 }
8124
8125 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8126
8127    The AP may be eliminated to either the SP or the FP, so we use the
8128    least common denominator, e.g. SImode, and offsets from 0 to 64.
8129
8130    ??? Verify whether the above is the right approach.
8131
8132    ??? Also, the FP may be eliminated to the SP, so perhaps that
8133    needs special handling also.
8134
8135    ??? Look at how the mips16 port solves this problem.  It probably uses
8136    better ways to solve some of these problems.
8137
8138    Although it is not incorrect, we don't accept QImode and HImode
8139    addresses based on the frame pointer or arg pointer until the
8140    reload pass starts.  This is so that eliminating such addresses
8141    into stack based ones won't produce impossible code.  */
8142 int
8143 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8144 {
8145   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8146     return 0;
8147
8148   /* ??? Not clear if this is right.  Experiment.  */
8149   if (GET_MODE_SIZE (mode) < 4
8150       && !(reload_in_progress || reload_completed)
8151       && (reg_mentioned_p (frame_pointer_rtx, x)
8152           || reg_mentioned_p (arg_pointer_rtx, x)
8153           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8154           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8155           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8156           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8157     return 0;
8158
8159   /* Accept any base register.  SP only in SImode or larger.  */
8160   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8161     return 1;
8162
8163   /* This is PC relative data before arm_reorg runs.  */
8164   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8165            && GET_CODE (x) == SYMBOL_REF
8166            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8167     return 1;
8168
8169   /* This is PC relative data after arm_reorg runs.  */
8170   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8171            && reload_completed
8172            && (GET_CODE (x) == LABEL_REF
8173                || (GET_CODE (x) == CONST
8174                    && GET_CODE (XEXP (x, 0)) == PLUS
8175                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8176                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8177     return 1;
8178
8179   /* Post-inc indexing only supported for SImode and larger.  */
8180   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8181            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8182     return 1;
8183
8184   else if (GET_CODE (x) == PLUS)
8185     {
8186       /* REG+REG address can be any two index registers.  */
8187       /* We disallow FRAME+REG addressing since we know that FRAME
8188          will be replaced with STACK, and SP relative addressing only
8189          permits SP+OFFSET.  */
8190       if (GET_MODE_SIZE (mode) <= 4
8191           && XEXP (x, 0) != frame_pointer_rtx
8192           && XEXP (x, 1) != frame_pointer_rtx
8193           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8194           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8195               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8196         return 1;
8197
8198       /* REG+const has 5-7 bit offset for non-SP registers.  */
8199       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8200                 || XEXP (x, 0) == arg_pointer_rtx)
8201                && CONST_INT_P (XEXP (x, 1))
8202                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8203         return 1;
8204
8205       /* REG+const has 10-bit offset for SP, but only SImode and
8206          larger is supported.  */
8207       /* ??? Should probably check for DI/DFmode overflow here
8208          just like GO_IF_LEGITIMATE_OFFSET does.  */
8209       else if (REG_P (XEXP (x, 0))
8210                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8211                && GET_MODE_SIZE (mode) >= 4
8212                && CONST_INT_P (XEXP (x, 1))
8213                && INTVAL (XEXP (x, 1)) >= 0
8214                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8215                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8216         return 1;
8217
8218       else if (REG_P (XEXP (x, 0))
8219                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8220                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8221                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8222                        && REGNO (XEXP (x, 0))
8223                           <= LAST_VIRTUAL_POINTER_REGISTER))
8224                && GET_MODE_SIZE (mode) >= 4
8225                && CONST_INT_P (XEXP (x, 1))
8226                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8227         return 1;
8228     }
8229
8230   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8231            && GET_MODE_SIZE (mode) == 4
8232            && GET_CODE (x) == SYMBOL_REF
8233            && CONSTANT_POOL_ADDRESS_P (x)
8234            && ! (flag_pic
8235                  && symbol_mentioned_p (get_pool_constant (x))
8236                  && ! pcrel_constant_p (get_pool_constant (x))))
8237     return 1;
8238
8239   return 0;
8240 }
8241
8242 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8243    instruction of mode MODE.  */
8244 int
8245 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8246 {
8247   switch (GET_MODE_SIZE (mode))
8248     {
8249     case 1:
8250       return val >= 0 && val < 32;
8251
8252     case 2:
8253       return val >= 0 && val < 64 && (val & 1) == 0;
8254
8255     default:
8256       return (val >= 0
8257               && (val + GET_MODE_SIZE (mode)) <= 128
8258               && (val & 3) == 0);
8259     }
8260 }
8261
8262 bool
8263 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8264 {
8265   if (TARGET_ARM)
8266     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8267   else if (TARGET_THUMB2)
8268     return thumb2_legitimate_address_p (mode, x, strict_p);
8269   else /* if (TARGET_THUMB1) */
8270     return thumb1_legitimate_address_p (mode, x, strict_p);
8271 }
8272
8273 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8274
8275    Given an rtx X being reloaded into a reg required to be
8276    in class CLASS, return the class of reg to actually use.
8277    In general this is just CLASS, but for the Thumb core registers and
8278    immediate constants we prefer a LO_REGS class or a subset.  */
8279
8280 static reg_class_t
8281 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8282 {
8283   if (TARGET_32BIT)
8284     return rclass;
8285   else
8286     {
8287       if (rclass == GENERAL_REGS)
8288         return LO_REGS;
8289       else
8290         return rclass;
8291     }
8292 }
8293
8294 /* Build the SYMBOL_REF for __tls_get_addr.  */
8295
8296 static GTY(()) rtx tls_get_addr_libfunc;
8297
8298 static rtx
8299 get_tls_get_addr (void)
8300 {
8301   if (!tls_get_addr_libfunc)
8302     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8303   return tls_get_addr_libfunc;
8304 }
8305
8306 rtx
8307 arm_load_tp (rtx target)
8308 {
8309   if (!target)
8310     target = gen_reg_rtx (SImode);
8311
8312   if (TARGET_HARD_TP)
8313     {
8314       /* Can return in any reg.  */
8315       emit_insn (gen_load_tp_hard (target));
8316     }
8317   else
8318     {
8319       /* Always returned in r0.  Immediately copy the result into a pseudo,
8320          otherwise other uses of r0 (e.g. setting up function arguments) may
8321          clobber the value.  */
8322
8323       rtx tmp;
8324
8325       emit_insn (gen_load_tp_soft ());
8326
8327       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8328       emit_move_insn (target, tmp);
8329     }
8330   return target;
8331 }
8332
8333 static rtx
8334 load_tls_operand (rtx x, rtx reg)
8335 {
8336   rtx tmp;
8337
8338   if (reg == NULL_RTX)
8339     reg = gen_reg_rtx (SImode);
8340
8341   tmp = gen_rtx_CONST (SImode, x);
8342
8343   emit_move_insn (reg, tmp);
8344
8345   return reg;
8346 }
8347
8348 static rtx_insn *
8349 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8350 {
8351   rtx label, labelno, sum;
8352
8353   gcc_assert (reloc != TLS_DESCSEQ);
8354   start_sequence ();
8355
8356   labelno = GEN_INT (pic_labelno++);
8357   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8358   label = gen_rtx_CONST (VOIDmode, label);
8359
8360   sum = gen_rtx_UNSPEC (Pmode,
8361                         gen_rtvec (4, x, GEN_INT (reloc), label,
8362                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8363                         UNSPEC_TLS);
8364   reg = load_tls_operand (sum, reg);
8365
8366   if (TARGET_ARM)
8367     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8368   else
8369     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8370
8371   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8372                                      LCT_PURE, /* LCT_CONST?  */
8373                                      Pmode, reg, Pmode);
8374
8375   rtx_insn *insns = get_insns ();
8376   end_sequence ();
8377
8378   return insns;
8379 }
8380
8381 static rtx
8382 arm_tls_descseq_addr (rtx x, rtx reg)
8383 {
8384   rtx labelno = GEN_INT (pic_labelno++);
8385   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8386   rtx sum = gen_rtx_UNSPEC (Pmode,
8387                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8388                                        gen_rtx_CONST (VOIDmode, label),
8389                                        GEN_INT (!TARGET_ARM)),
8390                             UNSPEC_TLS);
8391   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8392
8393   emit_insn (gen_tlscall (x, labelno));
8394   if (!reg)
8395     reg = gen_reg_rtx (SImode);
8396   else
8397     gcc_assert (REGNO (reg) != R0_REGNUM);
8398
8399   emit_move_insn (reg, reg0);
8400
8401   return reg;
8402 }
8403
8404 rtx
8405 legitimize_tls_address (rtx x, rtx reg)
8406 {
8407   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8408   rtx_insn *insns;
8409   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8410
8411   switch (model)
8412     {
8413     case TLS_MODEL_GLOBAL_DYNAMIC:
8414       if (TARGET_GNU2_TLS)
8415         {
8416           reg = arm_tls_descseq_addr (x, reg);
8417
8418           tp = arm_load_tp (NULL_RTX);
8419
8420           dest = gen_rtx_PLUS (Pmode, tp, reg);
8421         }
8422       else
8423         {
8424           /* Original scheme */
8425           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8426           dest = gen_reg_rtx (Pmode);
8427           emit_libcall_block (insns, dest, ret, x);
8428         }
8429       return dest;
8430
8431     case TLS_MODEL_LOCAL_DYNAMIC:
8432       if (TARGET_GNU2_TLS)
8433         {
8434           reg = arm_tls_descseq_addr (x, reg);
8435
8436           tp = arm_load_tp (NULL_RTX);
8437
8438           dest = gen_rtx_PLUS (Pmode, tp, reg);
8439         }
8440       else
8441         {
8442           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8443
8444           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8445              share the LDM result with other LD model accesses.  */
8446           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8447                                 UNSPEC_TLS);
8448           dest = gen_reg_rtx (Pmode);
8449           emit_libcall_block (insns, dest, ret, eqv);
8450
8451           /* Load the addend.  */
8452           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8453                                                      GEN_INT (TLS_LDO32)),
8454                                    UNSPEC_TLS);
8455           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8456           dest = gen_rtx_PLUS (Pmode, dest, addend);
8457         }
8458       return dest;
8459
8460     case TLS_MODEL_INITIAL_EXEC:
8461       labelno = GEN_INT (pic_labelno++);
8462       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8463       label = gen_rtx_CONST (VOIDmode, label);
8464       sum = gen_rtx_UNSPEC (Pmode,
8465                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8466                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8467                             UNSPEC_TLS);
8468       reg = load_tls_operand (sum, reg);
8469
8470       if (TARGET_ARM)
8471         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8472       else if (TARGET_THUMB2)
8473         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8474       else
8475         {
8476           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8477           emit_move_insn (reg, gen_const_mem (SImode, reg));
8478         }
8479
8480       tp = arm_load_tp (NULL_RTX);
8481
8482       return gen_rtx_PLUS (Pmode, tp, reg);
8483
8484     case TLS_MODEL_LOCAL_EXEC:
8485       tp = arm_load_tp (NULL_RTX);
8486
8487       reg = gen_rtx_UNSPEC (Pmode,
8488                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8489                             UNSPEC_TLS);
8490       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8491
8492       return gen_rtx_PLUS (Pmode, tp, reg);
8493
8494     default:
8495       abort ();
8496     }
8497 }
8498
8499 /* Try machine-dependent ways of modifying an illegitimate address
8500    to be legitimate.  If we find one, return the new, valid address.  */
8501 rtx
8502 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8503 {
8504   if (arm_tls_referenced_p (x))
8505     {
8506       rtx addend = NULL;
8507
8508       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8509         {
8510           addend = XEXP (XEXP (x, 0), 1);
8511           x = XEXP (XEXP (x, 0), 0);
8512         }
8513
8514       if (GET_CODE (x) != SYMBOL_REF)
8515         return x;
8516
8517       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8518
8519       x = legitimize_tls_address (x, NULL_RTX);
8520
8521       if (addend)
8522         {
8523           x = gen_rtx_PLUS (SImode, x, addend);
8524           orig_x = x;
8525         }
8526       else
8527         return x;
8528     }
8529
8530   if (!TARGET_ARM)
8531     {
8532       /* TODO: legitimize_address for Thumb2.  */
8533       if (TARGET_THUMB2)
8534         return x;
8535       return thumb_legitimize_address (x, orig_x, mode);
8536     }
8537
8538   if (GET_CODE (x) == PLUS)
8539     {
8540       rtx xop0 = XEXP (x, 0);
8541       rtx xop1 = XEXP (x, 1);
8542
8543       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8544         xop0 = force_reg (SImode, xop0);
8545
8546       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8547           && !symbol_mentioned_p (xop1))
8548         xop1 = force_reg (SImode, xop1);
8549
8550       if (ARM_BASE_REGISTER_RTX_P (xop0)
8551           && CONST_INT_P (xop1))
8552         {
8553           HOST_WIDE_INT n, low_n;
8554           rtx base_reg, val;
8555           n = INTVAL (xop1);
8556
8557           /* VFP addressing modes actually allow greater offsets, but for
8558              now we just stick with the lowest common denominator.  */
8559           if (mode == DImode || mode == DFmode)
8560             {
8561               low_n = n & 0x0f;
8562               n &= ~0x0f;
8563               if (low_n > 4)
8564                 {
8565                   n += 16;
8566                   low_n -= 16;
8567                 }
8568             }
8569           else
8570             {
8571               low_n = ((mode) == TImode ? 0
8572                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8573               n -= low_n;
8574             }
8575
8576           base_reg = gen_reg_rtx (SImode);
8577           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8578           emit_move_insn (base_reg, val);
8579           x = plus_constant (Pmode, base_reg, low_n);
8580         }
8581       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8582         x = gen_rtx_PLUS (SImode, xop0, xop1);
8583     }
8584
8585   /* XXX We don't allow MINUS any more -- see comment in
8586      arm_legitimate_address_outer_p ().  */
8587   else if (GET_CODE (x) == MINUS)
8588     {
8589       rtx xop0 = XEXP (x, 0);
8590       rtx xop1 = XEXP (x, 1);
8591
8592       if (CONSTANT_P (xop0))
8593         xop0 = force_reg (SImode, xop0);
8594
8595       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8596         xop1 = force_reg (SImode, xop1);
8597
8598       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8599         x = gen_rtx_MINUS (SImode, xop0, xop1);
8600     }
8601
8602   /* Make sure to take full advantage of the pre-indexed addressing mode
8603      with absolute addresses which often allows for the base register to
8604      be factorized for multiple adjacent memory references, and it might
8605      even allows for the mini pool to be avoided entirely. */
8606   else if (CONST_INT_P (x) && optimize > 0)
8607     {
8608       unsigned int bits;
8609       HOST_WIDE_INT mask, base, index;
8610       rtx base_reg;
8611
8612       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8613          use a 8-bit index. So let's use a 12-bit index for SImode only and
8614          hope that arm_gen_constant will enable ldrb to use more bits. */
8615       bits = (mode == SImode) ? 12 : 8;
8616       mask = (1 << bits) - 1;
8617       base = INTVAL (x) & ~mask;
8618       index = INTVAL (x) & mask;
8619       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8620         {
8621           /* It'll most probably be more efficient to generate the base
8622              with more bits set and use a negative index instead. */
8623           base |= mask;
8624           index -= mask;
8625         }
8626       base_reg = force_reg (SImode, GEN_INT (base));
8627       x = plus_constant (Pmode, base_reg, index);
8628     }
8629
8630   if (flag_pic)
8631     {
8632       /* We need to find and carefully transform any SYMBOL and LABEL
8633          references; so go back to the original address expression.  */
8634       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8635
8636       if (new_x != orig_x)
8637         x = new_x;
8638     }
8639
8640   return x;
8641 }
8642
8643
8644 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8645    to be legitimate.  If we find one, return the new, valid address.  */
8646 rtx
8647 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8648 {
8649   if (GET_CODE (x) == PLUS
8650       && CONST_INT_P (XEXP (x, 1))
8651       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8652           || INTVAL (XEXP (x, 1)) < 0))
8653     {
8654       rtx xop0 = XEXP (x, 0);
8655       rtx xop1 = XEXP (x, 1);
8656       HOST_WIDE_INT offset = INTVAL (xop1);
8657
8658       /* Try and fold the offset into a biasing of the base register and
8659          then offsetting that.  Don't do this when optimizing for space
8660          since it can cause too many CSEs.  */
8661       if (optimize_size && offset >= 0
8662           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8663         {
8664           HOST_WIDE_INT delta;
8665
8666           if (offset >= 256)
8667             delta = offset - (256 - GET_MODE_SIZE (mode));
8668           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8669             delta = 31 * GET_MODE_SIZE (mode);
8670           else
8671             delta = offset & (~31 * GET_MODE_SIZE (mode));
8672
8673           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8674                                 NULL_RTX);
8675           x = plus_constant (Pmode, xop0, delta);
8676         }
8677       else if (offset < 0 && offset > -256)
8678         /* Small negative offsets are best done with a subtract before the
8679            dereference, forcing these into a register normally takes two
8680            instructions.  */
8681         x = force_operand (x, NULL_RTX);
8682       else
8683         {
8684           /* For the remaining cases, force the constant into a register.  */
8685           xop1 = force_reg (SImode, xop1);
8686           x = gen_rtx_PLUS (SImode, xop0, xop1);
8687         }
8688     }
8689   else if (GET_CODE (x) == PLUS
8690            && s_register_operand (XEXP (x, 1), SImode)
8691            && !s_register_operand (XEXP (x, 0), SImode))
8692     {
8693       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8694
8695       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8696     }
8697
8698   if (flag_pic)
8699     {
8700       /* We need to find and carefully transform any SYMBOL and LABEL
8701          references; so go back to the original address expression.  */
8702       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8703
8704       if (new_x != orig_x)
8705         x = new_x;
8706     }
8707
8708   return x;
8709 }
8710
8711 /* Return TRUE if X contains any TLS symbol references.  */
8712
8713 bool
8714 arm_tls_referenced_p (rtx x)
8715 {
8716   if (! TARGET_HAVE_TLS)
8717     return false;
8718
8719   subrtx_iterator::array_type array;
8720   FOR_EACH_SUBRTX (iter, array, x, ALL)
8721     {
8722       const_rtx x = *iter;
8723       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8724         {
8725           /* ARM currently does not provide relocations to encode TLS variables
8726              into AArch32 instructions, only data, so there is no way to
8727              currently implement these if a literal pool is disabled.  */
8728           if (arm_disable_literal_pool)
8729             sorry ("accessing thread-local storage is not currently supported "
8730                    "with -mpure-code or -mslow-flash-data");
8731
8732           return true;
8733         }
8734
8735       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8736          TLS offsets, not real symbol references.  */
8737       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8738         iter.skip_subrtxes ();
8739     }
8740   return false;
8741 }
8742
8743 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8744
8745    On the ARM, allow any integer (invalid ones are removed later by insn
8746    patterns), nice doubles and symbol_refs which refer to the function's
8747    constant pool XXX.
8748
8749    When generating pic allow anything.  */
8750
8751 static bool
8752 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8753 {
8754   return flag_pic || !label_mentioned_p (x);
8755 }
8756
8757 static bool
8758 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8759 {
8760   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8761      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8762      for ARMv8-M Baseline or later the result is valid.  */
8763   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8764     x = XEXP (x, 0);
8765
8766   return (CONST_INT_P (x)
8767           || CONST_DOUBLE_P (x)
8768           || CONSTANT_ADDRESS_P (x)
8769           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8770           || flag_pic);
8771 }
8772
8773 static bool
8774 arm_legitimate_constant_p (machine_mode mode, rtx x)
8775 {
8776   return (!arm_cannot_force_const_mem (mode, x)
8777           && (TARGET_32BIT
8778               ? arm_legitimate_constant_p_1 (mode, x)
8779               : thumb_legitimate_constant_p (mode, x)));
8780 }
8781
8782 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8783
8784 static bool
8785 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8786 {
8787   rtx base, offset;
8788
8789   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8790     {
8791       split_const (x, &base, &offset);
8792       if (GET_CODE (base) == SYMBOL_REF
8793           && !offset_within_block_p (base, INTVAL (offset)))
8794         return true;
8795     }
8796   return arm_tls_referenced_p (x);
8797 }
8798 \f
8799 #define REG_OR_SUBREG_REG(X)                                            \
8800   (REG_P (X)                                                    \
8801    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8802
8803 #define REG_OR_SUBREG_RTX(X)                    \
8804    (REG_P (X) ? (X) : SUBREG_REG (X))
8805
8806 static inline int
8807 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8808 {
8809   machine_mode mode = GET_MODE (x);
8810   int total, words;
8811
8812   switch (code)
8813     {
8814     case ASHIFT:
8815     case ASHIFTRT:
8816     case LSHIFTRT:
8817     case ROTATERT:
8818       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8819
8820     case PLUS:
8821     case MINUS:
8822     case COMPARE:
8823     case NEG:
8824     case NOT:
8825       return COSTS_N_INSNS (1);
8826
8827     case MULT:
8828       if (arm_arch6m && arm_m_profile_small_mul)
8829         return COSTS_N_INSNS (32);
8830
8831       if (CONST_INT_P (XEXP (x, 1)))
8832         {
8833           int cycles = 0;
8834           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8835
8836           while (i)
8837             {
8838               i >>= 2;
8839               cycles++;
8840             }
8841           return COSTS_N_INSNS (2) + cycles;
8842         }
8843       return COSTS_N_INSNS (1) + 16;
8844
8845     case SET:
8846       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8847          the mode.  */
8848       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8849       return (COSTS_N_INSNS (words)
8850               + 4 * ((MEM_P (SET_SRC (x)))
8851                      + MEM_P (SET_DEST (x))));
8852
8853     case CONST_INT:
8854       if (outer == SET)
8855         {
8856           if (UINTVAL (x) < 256
8857               /* 16-bit constant.  */
8858               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8859             return 0;
8860           if (thumb_shiftable_const (INTVAL (x)))
8861             return COSTS_N_INSNS (2);
8862           return COSTS_N_INSNS (3);
8863         }
8864       else if ((outer == PLUS || outer == COMPARE)
8865                && INTVAL (x) < 256 && INTVAL (x) > -256)
8866         return 0;
8867       else if ((outer == IOR || outer == XOR || outer == AND)
8868                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8869         return COSTS_N_INSNS (1);
8870       else if (outer == AND)
8871         {
8872           int i;
8873           /* This duplicates the tests in the andsi3 expander.  */
8874           for (i = 9; i <= 31; i++)
8875             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8876                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8877               return COSTS_N_INSNS (2);
8878         }
8879       else if (outer == ASHIFT || outer == ASHIFTRT
8880                || outer == LSHIFTRT)
8881         return 0;
8882       return COSTS_N_INSNS (2);
8883
8884     case CONST:
8885     case CONST_DOUBLE:
8886     case LABEL_REF:
8887     case SYMBOL_REF:
8888       return COSTS_N_INSNS (3);
8889
8890     case UDIV:
8891     case UMOD:
8892     case DIV:
8893     case MOD:
8894       return 100;
8895
8896     case TRUNCATE:
8897       return 99;
8898
8899     case AND:
8900     case XOR:
8901     case IOR:
8902       /* XXX guess.  */
8903       return 8;
8904
8905     case MEM:
8906       /* XXX another guess.  */
8907       /* Memory costs quite a lot for the first word, but subsequent words
8908          load at the equivalent of a single insn each.  */
8909       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8910               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8911                  ? 4 : 0));
8912
8913     case IF_THEN_ELSE:
8914       /* XXX a guess.  */
8915       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8916         return 14;
8917       return 2;
8918
8919     case SIGN_EXTEND:
8920     case ZERO_EXTEND:
8921       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8922       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8923
8924       if (mode == SImode)
8925         return total;
8926
8927       if (arm_arch6)
8928         return total + COSTS_N_INSNS (1);
8929
8930       /* Assume a two-shift sequence.  Increase the cost slightly so
8931          we prefer actual shifts over an extend operation.  */
8932       return total + 1 + COSTS_N_INSNS (2);
8933
8934     default:
8935       return 99;
8936     }
8937 }
8938
8939 /* Estimates the size cost of thumb1 instructions.
8940    For now most of the code is copied from thumb1_rtx_costs. We need more
8941    fine grain tuning when we have more related test cases.  */
8942 static inline int
8943 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8944 {
8945   machine_mode mode = GET_MODE (x);
8946   int words, cost;
8947
8948   switch (code)
8949     {
8950     case ASHIFT:
8951     case ASHIFTRT:
8952     case LSHIFTRT:
8953     case ROTATERT:
8954       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8955
8956     case PLUS:
8957     case MINUS:
8958       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8959          defined by RTL expansion, especially for the expansion of
8960          multiplication.  */
8961       if ((GET_CODE (XEXP (x, 0)) == MULT
8962            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8963           || (GET_CODE (XEXP (x, 1)) == MULT
8964               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8965         return COSTS_N_INSNS (2);
8966       /* Fall through.  */
8967     case COMPARE:
8968     case NEG:
8969     case NOT:
8970       return COSTS_N_INSNS (1);
8971
8972     case MULT:
8973       if (CONST_INT_P (XEXP (x, 1)))
8974         {
8975           /* Thumb1 mul instruction can't operate on const. We must Load it
8976              into a register first.  */
8977           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8978           /* For the targets which have a very small and high-latency multiply
8979              unit, we prefer to synthesize the mult with up to 5 instructions,
8980              giving a good balance between size and performance.  */
8981           if (arm_arch6m && arm_m_profile_small_mul)
8982             return COSTS_N_INSNS (5);
8983           else
8984             return COSTS_N_INSNS (1) + const_size;
8985         }
8986       return COSTS_N_INSNS (1);
8987
8988     case SET:
8989       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8990          the mode.  */
8991       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8992       cost = COSTS_N_INSNS (words);
8993       if (satisfies_constraint_J (SET_SRC (x))
8994           || satisfies_constraint_K (SET_SRC (x))
8995              /* Too big an immediate for a 2-byte mov, using MOVT.  */
8996           || (CONST_INT_P (SET_SRC (x))
8997               && UINTVAL (SET_SRC (x)) >= 256
8998               && TARGET_HAVE_MOVT
8999               && satisfies_constraint_j (SET_SRC (x)))
9000              /* thumb1_movdi_insn.  */
9001           || ((words > 1) && MEM_P (SET_SRC (x))))
9002         cost += COSTS_N_INSNS (1);
9003       return cost;
9004
9005     case CONST_INT:
9006       if (outer == SET)
9007         {
9008           if (UINTVAL (x) < 256)
9009             return COSTS_N_INSNS (1);
9010           /* movw is 4byte long.  */
9011           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9012             return COSTS_N_INSNS (2);
9013           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9014           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9015             return COSTS_N_INSNS (2);
9016           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9017           if (thumb_shiftable_const (INTVAL (x)))
9018             return COSTS_N_INSNS (2);
9019           return COSTS_N_INSNS (3);
9020         }
9021       else if ((outer == PLUS || outer == COMPARE)
9022                && INTVAL (x) < 256 && INTVAL (x) > -256)
9023         return 0;
9024       else if ((outer == IOR || outer == XOR || outer == AND)
9025                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9026         return COSTS_N_INSNS (1);
9027       else if (outer == AND)
9028         {
9029           int i;
9030           /* This duplicates the tests in the andsi3 expander.  */
9031           for (i = 9; i <= 31; i++)
9032             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9033                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9034               return COSTS_N_INSNS (2);
9035         }
9036       else if (outer == ASHIFT || outer == ASHIFTRT
9037                || outer == LSHIFTRT)
9038         return 0;
9039       return COSTS_N_INSNS (2);
9040
9041     case CONST:
9042     case CONST_DOUBLE:
9043     case LABEL_REF:
9044     case SYMBOL_REF:
9045       return COSTS_N_INSNS (3);
9046
9047     case UDIV:
9048     case UMOD:
9049     case DIV:
9050     case MOD:
9051       return 100;
9052
9053     case TRUNCATE:
9054       return 99;
9055
9056     case AND:
9057     case XOR:
9058     case IOR:
9059       return COSTS_N_INSNS (1);
9060
9061     case MEM:
9062       return (COSTS_N_INSNS (1)
9063               + COSTS_N_INSNS (1)
9064                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9065               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9066                  ? COSTS_N_INSNS (1) : 0));
9067
9068     case IF_THEN_ELSE:
9069       /* XXX a guess.  */
9070       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9071         return 14;
9072       return 2;
9073
9074     case ZERO_EXTEND:
9075       /* XXX still guessing.  */
9076       switch (GET_MODE (XEXP (x, 0)))
9077         {
9078           case E_QImode:
9079             return (1 + (mode == DImode ? 4 : 0)
9080                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9081
9082           case E_HImode:
9083             return (4 + (mode == DImode ? 4 : 0)
9084                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085
9086           case E_SImode:
9087             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9088
9089           default:
9090             return 99;
9091         }
9092
9093     default:
9094       return 99;
9095     }
9096 }
9097
9098 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9099    operand, then return the operand that is being shifted.  If the shift
9100    is not by a constant, then set SHIFT_REG to point to the operand.
9101    Return NULL if OP is not a shifter operand.  */
9102 static rtx
9103 shifter_op_p (rtx op, rtx *shift_reg)
9104 {
9105   enum rtx_code code = GET_CODE (op);
9106
9107   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9108       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9109     return XEXP (op, 0);
9110   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9111     return XEXP (op, 0);
9112   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9113            || code == ASHIFTRT)
9114     {
9115       if (!CONST_INT_P (XEXP (op, 1)))
9116         *shift_reg = XEXP (op, 1);
9117       return XEXP (op, 0);
9118     }
9119
9120   return NULL;
9121 }
9122
9123 static bool
9124 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9125 {
9126   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9127   rtx_code code = GET_CODE (x);
9128   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9129
9130   switch (XINT (x, 1))
9131     {
9132     case UNSPEC_UNALIGNED_LOAD:
9133       /* We can only do unaligned loads into the integer unit, and we can't
9134          use LDM or LDRD.  */
9135       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9136       if (speed_p)
9137         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9138                   + extra_cost->ldst.load_unaligned);
9139
9140 #ifdef NOT_YET
9141       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9142                                  ADDR_SPACE_GENERIC, speed_p);
9143 #endif
9144       return true;
9145
9146     case UNSPEC_UNALIGNED_STORE:
9147       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9148       if (speed_p)
9149         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9150                   + extra_cost->ldst.store_unaligned);
9151
9152       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9153 #ifdef NOT_YET
9154       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9155                                  ADDR_SPACE_GENERIC, speed_p);
9156 #endif
9157       return true;
9158
9159     case UNSPEC_VRINTZ:
9160     case UNSPEC_VRINTP:
9161     case UNSPEC_VRINTM:
9162     case UNSPEC_VRINTR:
9163     case UNSPEC_VRINTX:
9164     case UNSPEC_VRINTA:
9165       if (speed_p)
9166         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9167
9168       return true;
9169     default:
9170       *cost = COSTS_N_INSNS (2);
9171       break;
9172     }
9173   return true;
9174 }
9175
9176 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9177    call (one insn for -Os) and then one for processing the result.  */
9178 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9179
9180 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9181         do                                                              \
9182           {                                                             \
9183             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9184             if (shift_op != NULL                                        \
9185                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9186               {                                                         \
9187                 if (shift_reg)                                          \
9188                   {                                                     \
9189                     if (speed_p)                                        \
9190                       *cost += extra_cost->alu.arith_shift_reg;         \
9191                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9192                                        ASHIFT, 1, speed_p);             \
9193                   }                                                     \
9194                 else if (speed_p)                                       \
9195                   *cost += extra_cost->alu.arith_shift;                 \
9196                                                                         \
9197                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9198                                     ASHIFT, 0, speed_p)                 \
9199                           + rtx_cost (XEXP (x, 1 - IDX),                \
9200                                       GET_MODE (shift_op),              \
9201                                       OP, 1, speed_p));                 \
9202                 return true;                                            \
9203               }                                                         \
9204           }                                                             \
9205         while (0);
9206
9207 /* RTX costs.  Make an estimate of the cost of executing the operation
9208    X, which is contained with an operation with code OUTER_CODE.
9209    SPEED_P indicates whether the cost desired is the performance cost,
9210    or the size cost.  The estimate is stored in COST and the return
9211    value is TRUE if the cost calculation is final, or FALSE if the
9212    caller should recurse through the operands of X to add additional
9213    costs.
9214
9215    We currently make no attempt to model the size savings of Thumb-2
9216    16-bit instructions.  At the normal points in compilation where
9217    this code is called we have no measure of whether the condition
9218    flags are live or not, and thus no realistic way to determine what
9219    the size will eventually be.  */
9220 static bool
9221 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9222                    const struct cpu_cost_table *extra_cost,
9223                    int *cost, bool speed_p)
9224 {
9225   machine_mode mode = GET_MODE (x);
9226
9227   *cost = COSTS_N_INSNS (1);
9228
9229   if (TARGET_THUMB1)
9230     {
9231       if (speed_p)
9232         *cost = thumb1_rtx_costs (x, code, outer_code);
9233       else
9234         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9235       return true;
9236     }
9237
9238   switch (code)
9239     {
9240     case SET:
9241       *cost = 0;
9242       /* SET RTXs don't have a mode so we get it from the destination.  */
9243       mode = GET_MODE (SET_DEST (x));
9244
9245       if (REG_P (SET_SRC (x))
9246           && REG_P (SET_DEST (x)))
9247         {
9248           /* Assume that most copies can be done with a single insn,
9249              unless we don't have HW FP, in which case everything
9250              larger than word mode will require two insns.  */
9251           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9252                                    && GET_MODE_SIZE (mode) > 4)
9253                                   || mode == DImode)
9254                                  ? 2 : 1);
9255           /* Conditional register moves can be encoded
9256              in 16 bits in Thumb mode.  */
9257           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9258             *cost >>= 1;
9259
9260           return true;
9261         }
9262
9263       if (CONST_INT_P (SET_SRC (x)))
9264         {
9265           /* Handle CONST_INT here, since the value doesn't have a mode
9266              and we would otherwise be unable to work out the true cost.  */
9267           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9268                             0, speed_p);
9269           outer_code = SET;
9270           /* Slightly lower the cost of setting a core reg to a constant.
9271              This helps break up chains and allows for better scheduling.  */
9272           if (REG_P (SET_DEST (x))
9273               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9274             *cost -= 1;
9275           x = SET_SRC (x);
9276           /* Immediate moves with an immediate in the range [0, 255] can be
9277              encoded in 16 bits in Thumb mode.  */
9278           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9279               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9280             *cost >>= 1;
9281           goto const_int_cost;
9282         }
9283
9284       return false;
9285
9286     case MEM:
9287       /* A memory access costs 1 insn if the mode is small, or the address is
9288          a single register, otherwise it costs one insn per word.  */
9289       if (REG_P (XEXP (x, 0)))
9290         *cost = COSTS_N_INSNS (1);
9291       else if (flag_pic
9292                && GET_CODE (XEXP (x, 0)) == PLUS
9293                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9294         /* This will be split into two instructions.
9295            See arm.md:calculate_pic_address.  */
9296         *cost = COSTS_N_INSNS (2);
9297       else
9298         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9299
9300       /* For speed optimizations, add the costs of the address and
9301          accessing memory.  */
9302       if (speed_p)
9303 #ifdef NOT_YET
9304         *cost += (extra_cost->ldst.load
9305                   + arm_address_cost (XEXP (x, 0), mode,
9306                                       ADDR_SPACE_GENERIC, speed_p));
9307 #else
9308         *cost += extra_cost->ldst.load;
9309 #endif
9310       return true;
9311
9312     case PARALLEL:
9313     {
9314    /* Calculations of LDM costs are complex.  We assume an initial cost
9315    (ldm_1st) which will load the number of registers mentioned in
9316    ldm_regs_per_insn_1st registers; then each additional
9317    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9318    formula for N regs is thus:
9319
9320    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9321                              + ldm_regs_per_insn_subsequent - 1)
9322                             / ldm_regs_per_insn_subsequent).
9323
9324    Additional costs may also be added for addressing.  A similar
9325    formula is used for STM.  */
9326
9327       bool is_ldm = load_multiple_operation (x, SImode);
9328       bool is_stm = store_multiple_operation (x, SImode);
9329
9330       if (is_ldm || is_stm)
9331         {
9332           if (speed_p)
9333             {
9334               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9335               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9336                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9337                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9338               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9339                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9340                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9341
9342               *cost += regs_per_insn_1st
9343                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9344                                             + regs_per_insn_sub - 1)
9345                                           / regs_per_insn_sub);
9346               return true;
9347             }
9348
9349         }
9350       return false;
9351     }
9352     case DIV:
9353     case UDIV:
9354       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9355           && (mode == SFmode || !TARGET_VFP_SINGLE))
9356         *cost += COSTS_N_INSNS (speed_p
9357                                ? extra_cost->fp[mode != SFmode].div : 0);
9358       else if (mode == SImode && TARGET_IDIV)
9359         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9360       else
9361         *cost = LIBCALL_COST (2);
9362
9363       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9364          possible udiv is prefered.  */
9365       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9366       return false;     /* All arguments must be in registers.  */
9367
9368     case MOD:
9369       /* MOD by a power of 2 can be expanded as:
9370          rsbs    r1, r0, #0
9371          and     r0, r0, #(n - 1)
9372          and     r1, r1, #(n - 1)
9373          rsbpl   r0, r1, #0.  */
9374       if (CONST_INT_P (XEXP (x, 1))
9375           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9376           && mode == SImode)
9377         {
9378           *cost += COSTS_N_INSNS (3);
9379
9380           if (speed_p)
9381             *cost += 2 * extra_cost->alu.logical
9382                      + extra_cost->alu.arith;
9383           return true;
9384         }
9385
9386     /* Fall-through.  */
9387     case UMOD:
9388       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9389          possible udiv is prefered.  */
9390       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9391       return false;     /* All arguments must be in registers.  */
9392
9393     case ROTATE:
9394       if (mode == SImode && REG_P (XEXP (x, 1)))
9395         {
9396           *cost += (COSTS_N_INSNS (1)
9397                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9398           if (speed_p)
9399             *cost += extra_cost->alu.shift_reg;
9400           return true;
9401         }
9402       /* Fall through */
9403     case ROTATERT:
9404     case ASHIFT:
9405     case LSHIFTRT:
9406     case ASHIFTRT:
9407       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9408         {
9409           *cost += (COSTS_N_INSNS (2)
9410                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9411           if (speed_p)
9412             *cost += 2 * extra_cost->alu.shift;
9413           return true;
9414         }
9415       else if (mode == SImode)
9416         {
9417           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9418           /* Slightly disparage register shifts at -Os, but not by much.  */
9419           if (!CONST_INT_P (XEXP (x, 1)))
9420             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9421                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9422           return true;
9423         }
9424       else if (GET_MODE_CLASS (mode) == MODE_INT
9425                && GET_MODE_SIZE (mode) < 4)
9426         {
9427           if (code == ASHIFT)
9428             {
9429               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9430               /* Slightly disparage register shifts at -Os, but not by
9431                  much.  */
9432               if (!CONST_INT_P (XEXP (x, 1)))
9433                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9434                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9435             }
9436           else if (code == LSHIFTRT || code == ASHIFTRT)
9437             {
9438               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9439                 {
9440                   /* Can use SBFX/UBFX.  */
9441                   if (speed_p)
9442                     *cost += extra_cost->alu.bfx;
9443                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9444                 }
9445               else
9446                 {
9447                   *cost += COSTS_N_INSNS (1);
9448                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9449                   if (speed_p)
9450                     {
9451                       if (CONST_INT_P (XEXP (x, 1)))
9452                         *cost += 2 * extra_cost->alu.shift;
9453                       else
9454                         *cost += (extra_cost->alu.shift
9455                                   + extra_cost->alu.shift_reg);
9456                     }
9457                   else
9458                     /* Slightly disparage register shifts.  */
9459                     *cost += !CONST_INT_P (XEXP (x, 1));
9460                 }
9461             }
9462           else /* Rotates.  */
9463             {
9464               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9465               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9466               if (speed_p)
9467                 {
9468                   if (CONST_INT_P (XEXP (x, 1)))
9469                     *cost += (2 * extra_cost->alu.shift
9470                               + extra_cost->alu.log_shift);
9471                   else
9472                     *cost += (extra_cost->alu.shift
9473                               + extra_cost->alu.shift_reg
9474                               + extra_cost->alu.log_shift_reg);
9475                 }
9476             }
9477           return true;
9478         }
9479
9480       *cost = LIBCALL_COST (2);
9481       return false;
9482
9483     case BSWAP:
9484       if (arm_arch6)
9485         {
9486           if (mode == SImode)
9487             {
9488               if (speed_p)
9489                 *cost += extra_cost->alu.rev;
9490
9491               return false;
9492             }
9493         }
9494       else
9495         {
9496         /* No rev instruction available.  Look at arm_legacy_rev
9497            and thumb_legacy_rev for the form of RTL used then.  */
9498           if (TARGET_THUMB)
9499             {
9500               *cost += COSTS_N_INSNS (9);
9501
9502               if (speed_p)
9503                 {
9504                   *cost += 6 * extra_cost->alu.shift;
9505                   *cost += 3 * extra_cost->alu.logical;
9506                 }
9507             }
9508           else
9509             {
9510               *cost += COSTS_N_INSNS (4);
9511
9512               if (speed_p)
9513                 {
9514                   *cost += 2 * extra_cost->alu.shift;
9515                   *cost += extra_cost->alu.arith_shift;
9516                   *cost += 2 * extra_cost->alu.logical;
9517                 }
9518             }
9519           return true;
9520         }
9521       return false;
9522
9523     case MINUS:
9524       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9525           && (mode == SFmode || !TARGET_VFP_SINGLE))
9526         {
9527           if (GET_CODE (XEXP (x, 0)) == MULT
9528               || GET_CODE (XEXP (x, 1)) == MULT)
9529             {
9530               rtx mul_op0, mul_op1, sub_op;
9531
9532               if (speed_p)
9533                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9534
9535               if (GET_CODE (XEXP (x, 0)) == MULT)
9536                 {
9537                   mul_op0 = XEXP (XEXP (x, 0), 0);
9538                   mul_op1 = XEXP (XEXP (x, 0), 1);
9539                   sub_op = XEXP (x, 1);
9540                 }
9541               else
9542                 {
9543                   mul_op0 = XEXP (XEXP (x, 1), 0);
9544                   mul_op1 = XEXP (XEXP (x, 1), 1);
9545                   sub_op = XEXP (x, 0);
9546                 }
9547
9548               /* The first operand of the multiply may be optionally
9549                  negated.  */
9550               if (GET_CODE (mul_op0) == NEG)
9551                 mul_op0 = XEXP (mul_op0, 0);
9552
9553               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9554                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9555                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9556
9557               return true;
9558             }
9559
9560           if (speed_p)
9561             *cost += extra_cost->fp[mode != SFmode].addsub;
9562           return false;
9563         }
9564
9565       if (mode == SImode)
9566         {
9567           rtx shift_by_reg = NULL;
9568           rtx shift_op;
9569           rtx non_shift_op;
9570
9571           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9572           if (shift_op == NULL)
9573             {
9574               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9575               non_shift_op = XEXP (x, 0);
9576             }
9577           else
9578             non_shift_op = XEXP (x, 1);
9579
9580           if (shift_op != NULL)
9581             {
9582               if (shift_by_reg != NULL)
9583                 {
9584                   if (speed_p)
9585                     *cost += extra_cost->alu.arith_shift_reg;
9586                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9587                 }
9588               else if (speed_p)
9589                 *cost += extra_cost->alu.arith_shift;
9590
9591               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9592               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9593               return true;
9594             }
9595
9596           if (arm_arch_thumb2
9597               && GET_CODE (XEXP (x, 1)) == MULT)
9598             {
9599               /* MLS.  */
9600               if (speed_p)
9601                 *cost += extra_cost->mult[0].add;
9602               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9603               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9604               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9605               return true;
9606             }
9607
9608           if (CONST_INT_P (XEXP (x, 0)))
9609             {
9610               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9611                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9612                                             NULL_RTX, 1, 0);
9613               *cost = COSTS_N_INSNS (insns);
9614               if (speed_p)
9615                 *cost += insns * extra_cost->alu.arith;
9616               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9617               return true;
9618             }
9619           else if (speed_p)
9620             *cost += extra_cost->alu.arith;
9621
9622           return false;
9623         }
9624
9625       if (GET_MODE_CLASS (mode) == MODE_INT
9626           && GET_MODE_SIZE (mode) < 4)
9627         {
9628           rtx shift_op, shift_reg;
9629           shift_reg = NULL;
9630
9631           /* We check both sides of the MINUS for shifter operands since,
9632              unlike PLUS, it's not commutative.  */
9633
9634           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9635           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9636
9637           /* Slightly disparage, as we might need to widen the result.  */
9638           *cost += 1;
9639           if (speed_p)
9640             *cost += extra_cost->alu.arith;
9641
9642           if (CONST_INT_P (XEXP (x, 0)))
9643             {
9644               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9645               return true;
9646             }
9647
9648           return false;
9649         }
9650
9651       if (mode == DImode)
9652         {
9653           *cost += COSTS_N_INSNS (1);
9654
9655           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9656             {
9657               rtx op1 = XEXP (x, 1);
9658
9659               if (speed_p)
9660                 *cost += 2 * extra_cost->alu.arith;
9661
9662               if (GET_CODE (op1) == ZERO_EXTEND)
9663                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9664                                    0, speed_p);
9665               else
9666                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9667               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9668                                  0, speed_p);
9669               return true;
9670             }
9671           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9672             {
9673               if (speed_p)
9674                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9675               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9676                                   0, speed_p)
9677                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9678               return true;
9679             }
9680           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9681                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9682             {
9683               if (speed_p)
9684                 *cost += (extra_cost->alu.arith
9685                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9686                              ? extra_cost->alu.arith
9687                              : extra_cost->alu.arith_shift));
9688               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9689                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9690                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9691               return true;
9692             }
9693
9694           if (speed_p)
9695             *cost += 2 * extra_cost->alu.arith;
9696           return false;
9697         }
9698
9699       /* Vector mode?  */
9700
9701       *cost = LIBCALL_COST (2);
9702       return false;
9703
9704     case PLUS:
9705       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9706           && (mode == SFmode || !TARGET_VFP_SINGLE))
9707         {
9708           if (GET_CODE (XEXP (x, 0)) == MULT)
9709             {
9710               rtx mul_op0, mul_op1, add_op;
9711
9712               if (speed_p)
9713                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9714
9715               mul_op0 = XEXP (XEXP (x, 0), 0);
9716               mul_op1 = XEXP (XEXP (x, 0), 1);
9717               add_op = XEXP (x, 1);
9718
9719               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9720                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9721                         + rtx_cost (add_op, mode, code, 0, speed_p));
9722
9723               return true;
9724             }
9725
9726           if (speed_p)
9727             *cost += extra_cost->fp[mode != SFmode].addsub;
9728           return false;
9729         }
9730       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9731         {
9732           *cost = LIBCALL_COST (2);
9733           return false;
9734         }
9735
9736         /* Narrow modes can be synthesized in SImode, but the range
9737            of useful sub-operations is limited.  Check for shift operations
9738            on one of the operands.  Only left shifts can be used in the
9739            narrow modes.  */
9740       if (GET_MODE_CLASS (mode) == MODE_INT
9741           && GET_MODE_SIZE (mode) < 4)
9742         {
9743           rtx shift_op, shift_reg;
9744           shift_reg = NULL;
9745
9746           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9747
9748           if (CONST_INT_P (XEXP (x, 1)))
9749             {
9750               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9751                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9752                                             NULL_RTX, 1, 0);
9753               *cost = COSTS_N_INSNS (insns);
9754               if (speed_p)
9755                 *cost += insns * extra_cost->alu.arith;
9756               /* Slightly penalize a narrow operation as the result may
9757                  need widening.  */
9758               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9759               return true;
9760             }
9761
9762           /* Slightly penalize a narrow operation as the result may
9763              need widening.  */
9764           *cost += 1;
9765           if (speed_p)
9766             *cost += extra_cost->alu.arith;
9767
9768           return false;
9769         }
9770
9771       if (mode == SImode)
9772         {
9773           rtx shift_op, shift_reg;
9774
9775           if (TARGET_INT_SIMD
9776               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9777                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9778             {
9779               /* UXTA[BH] or SXTA[BH].  */
9780               if (speed_p)
9781                 *cost += extra_cost->alu.extend_arith;
9782               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9783                                   0, speed_p)
9784                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9785               return true;
9786             }
9787
9788           shift_reg = NULL;
9789           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9790           if (shift_op != NULL)
9791             {
9792               if (shift_reg)
9793                 {
9794                   if (speed_p)
9795                     *cost += extra_cost->alu.arith_shift_reg;
9796                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9797                 }
9798               else if (speed_p)
9799                 *cost += extra_cost->alu.arith_shift;
9800
9801               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9802                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9803               return true;
9804             }
9805           if (GET_CODE (XEXP (x, 0)) == MULT)
9806             {
9807               rtx mul_op = XEXP (x, 0);
9808
9809               if (TARGET_DSP_MULTIPLY
9810                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822                                       == 16))))))
9823                 {
9824                   /* SMLA[BT][BT].  */
9825                   if (speed_p)
9826                     *cost += extra_cost->mult[0].extend_add;
9827                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9828                                       SIGN_EXTEND, 0, speed_p)
9829                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9830                                         SIGN_EXTEND, 0, speed_p)
9831                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9832                   return true;
9833                 }
9834
9835               if (speed_p)
9836                 *cost += extra_cost->mult[0].add;
9837               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9838                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9839                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9840               return true;
9841             }
9842           if (CONST_INT_P (XEXP (x, 1)))
9843             {
9844               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9846                                             NULL_RTX, 1, 0);
9847               *cost = COSTS_N_INSNS (insns);
9848               if (speed_p)
9849                 *cost += insns * extra_cost->alu.arith;
9850               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9851               return true;
9852             }
9853           else if (speed_p)
9854             *cost += extra_cost->alu.arith;
9855
9856           return false;
9857         }
9858
9859       if (mode == DImode)
9860         {
9861           if (arm_arch3m
9862               && GET_CODE (XEXP (x, 0)) == MULT
9863               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9864                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9865                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9866                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9867             {
9868               if (speed_p)
9869                 *cost += extra_cost->mult[1].extend_add;
9870               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9871                                   ZERO_EXTEND, 0, speed_p)
9872                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9873                                     ZERO_EXTEND, 0, speed_p)
9874                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9875               return true;
9876             }
9877
9878           *cost += COSTS_N_INSNS (1);
9879
9880           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9881               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9882             {
9883               if (speed_p)
9884                 *cost += (extra_cost->alu.arith
9885                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886                              ? extra_cost->alu.arith
9887                              : extra_cost->alu.arith_shift));
9888
9889               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9890                                   0, speed_p)
9891                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9892               return true;
9893             }
9894
9895           if (speed_p)
9896             *cost += 2 * extra_cost->alu.arith;
9897           return false;
9898         }
9899
9900       /* Vector mode?  */
9901       *cost = LIBCALL_COST (2);
9902       return false;
9903     case IOR:
9904       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9905         {
9906           if (speed_p)
9907             *cost += extra_cost->alu.rev;
9908
9909           return true;
9910         }
9911     /* Fall through.  */
9912     case AND: case XOR:
9913       if (mode == SImode)
9914         {
9915           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9916           rtx op0 = XEXP (x, 0);
9917           rtx shift_op, shift_reg;
9918
9919           if (subcode == NOT
9920               && (code == AND
9921                   || (code == IOR && TARGET_THUMB2)))
9922             op0 = XEXP (op0, 0);
9923
9924           shift_reg = NULL;
9925           shift_op = shifter_op_p (op0, &shift_reg);
9926           if (shift_op != NULL)
9927             {
9928               if (shift_reg)
9929                 {
9930                   if (speed_p)
9931                     *cost += extra_cost->alu.log_shift_reg;
9932                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9933                 }
9934               else if (speed_p)
9935                 *cost += extra_cost->alu.log_shift;
9936
9937               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9938                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9939               return true;
9940             }
9941
9942           if (CONST_INT_P (XEXP (x, 1)))
9943             {
9944               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9945                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9946                                             NULL_RTX, 1, 0);
9947
9948               *cost = COSTS_N_INSNS (insns);
9949               if (speed_p)
9950                 *cost += insns * extra_cost->alu.logical;
9951               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9952               return true;
9953             }
9954
9955           if (speed_p)
9956             *cost += extra_cost->alu.logical;
9957           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9958                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9959           return true;
9960         }
9961
9962       if (mode == DImode)
9963         {
9964           rtx op0 = XEXP (x, 0);
9965           enum rtx_code subcode = GET_CODE (op0);
9966
9967           *cost += COSTS_N_INSNS (1);
9968
9969           if (subcode == NOT
9970               && (code == AND
9971                   || (code == IOR && TARGET_THUMB2)))
9972             op0 = XEXP (op0, 0);
9973
9974           if (GET_CODE (op0) == ZERO_EXTEND)
9975             {
9976               if (speed_p)
9977                 *cost += 2 * extra_cost->alu.logical;
9978
9979               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9980                                   0, speed_p)
9981                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9982               return true;
9983             }
9984           else if (GET_CODE (op0) == SIGN_EXTEND)
9985             {
9986               if (speed_p)
9987                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9988
9989               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9990                                   0, speed_p)
9991                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9992               return true;
9993             }
9994
9995           if (speed_p)
9996             *cost += 2 * extra_cost->alu.logical;
9997
9998           return true;
9999         }
10000       /* Vector mode?  */
10001
10002       *cost = LIBCALL_COST (2);
10003       return false;
10004
10005     case MULT:
10006       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10007           && (mode == SFmode || !TARGET_VFP_SINGLE))
10008         {
10009           rtx op0 = XEXP (x, 0);
10010
10011           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10012             op0 = XEXP (op0, 0);
10013
10014           if (speed_p)
10015             *cost += extra_cost->fp[mode != SFmode].mult;
10016
10017           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10018                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10019           return true;
10020         }
10021       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10022         {
10023           *cost = LIBCALL_COST (2);
10024           return false;
10025         }
10026
10027       if (mode == SImode)
10028         {
10029           if (TARGET_DSP_MULTIPLY
10030               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10031                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10032                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10033                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10034                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10035                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10036                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10037                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10038                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10039                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10040                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10041                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10042                                   == 16))))))
10043             {
10044               /* SMUL[TB][TB].  */
10045               if (speed_p)
10046                 *cost += extra_cost->mult[0].extend;
10047               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10048                                  SIGN_EXTEND, 0, speed_p);
10049               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10050                                  SIGN_EXTEND, 1, speed_p);
10051               return true;
10052             }
10053           if (speed_p)
10054             *cost += extra_cost->mult[0].simple;
10055           return false;
10056         }
10057
10058       if (mode == DImode)
10059         {
10060           if (arm_arch3m
10061               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10065             {
10066               if (speed_p)
10067                 *cost += extra_cost->mult[1].extend;
10068               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10069                                   ZERO_EXTEND, 0, speed_p)
10070                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10071                                     ZERO_EXTEND, 0, speed_p));
10072               return true;
10073             }
10074
10075           *cost = LIBCALL_COST (2);
10076           return false;
10077         }
10078
10079       /* Vector mode?  */
10080       *cost = LIBCALL_COST (2);
10081       return false;
10082
10083     case NEG:
10084       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10085           && (mode == SFmode || !TARGET_VFP_SINGLE))
10086         {
10087           if (GET_CODE (XEXP (x, 0)) == MULT)
10088             {
10089               /* VNMUL.  */
10090               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10091               return true;
10092             }
10093
10094           if (speed_p)
10095             *cost += extra_cost->fp[mode != SFmode].neg;
10096
10097           return false;
10098         }
10099       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10100         {
10101           *cost = LIBCALL_COST (1);
10102           return false;
10103         }
10104
10105       if (mode == SImode)
10106         {
10107           if (GET_CODE (XEXP (x, 0)) == ABS)
10108             {
10109               *cost += COSTS_N_INSNS (1);
10110               /* Assume the non-flag-changing variant.  */
10111               if (speed_p)
10112                 *cost += (extra_cost->alu.log_shift
10113                           + extra_cost->alu.arith_shift);
10114               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10115               return true;
10116             }
10117
10118           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10119               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10120             {
10121               *cost += COSTS_N_INSNS (1);
10122               /* No extra cost for MOV imm and MVN imm.  */
10123               /* If the comparison op is using the flags, there's no further
10124                  cost, otherwise we need to add the cost of the comparison.  */
10125               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10126                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10127                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10128                 {
10129                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10130                   *cost += (COSTS_N_INSNS (1)
10131                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10132                                         0, speed_p)
10133                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10134                                         1, speed_p));
10135                   if (speed_p)
10136                     *cost += extra_cost->alu.arith;
10137                 }
10138               return true;
10139             }
10140
10141           if (speed_p)
10142             *cost += extra_cost->alu.arith;
10143           return false;
10144         }
10145
10146       if (GET_MODE_CLASS (mode) == MODE_INT
10147           && GET_MODE_SIZE (mode) < 4)
10148         {
10149           /* Slightly disparage, as we might need an extend operation.  */
10150           *cost += 1;
10151           if (speed_p)
10152             *cost += extra_cost->alu.arith;
10153           return false;
10154         }
10155
10156       if (mode == DImode)
10157         {
10158           *cost += COSTS_N_INSNS (1);
10159           if (speed_p)
10160             *cost += 2 * extra_cost->alu.arith;
10161           return false;
10162         }
10163
10164       /* Vector mode?  */
10165       *cost = LIBCALL_COST (1);
10166       return false;
10167
10168     case NOT:
10169       if (mode == SImode)
10170         {
10171           rtx shift_op;
10172           rtx shift_reg = NULL;
10173
10174           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10175
10176           if (shift_op)
10177             {
10178               if (shift_reg != NULL)
10179                 {
10180                   if (speed_p)
10181                     *cost += extra_cost->alu.log_shift_reg;
10182                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10183                 }
10184               else if (speed_p)
10185                 *cost += extra_cost->alu.log_shift;
10186               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10187               return true;
10188             }
10189
10190           if (speed_p)
10191             *cost += extra_cost->alu.logical;
10192           return false;
10193         }
10194       if (mode == DImode)
10195         {
10196           *cost += COSTS_N_INSNS (1);
10197           return false;
10198         }
10199
10200       /* Vector mode?  */
10201
10202       *cost += LIBCALL_COST (1);
10203       return false;
10204
10205     case IF_THEN_ELSE:
10206       {
10207         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10208           {
10209             *cost += COSTS_N_INSNS (3);
10210             return true;
10211           }
10212         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10213         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10214
10215         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10216         /* Assume that if one arm of the if_then_else is a register,
10217            that it will be tied with the result and eliminate the
10218            conditional insn.  */
10219         if (REG_P (XEXP (x, 1)))
10220           *cost += op2cost;
10221         else if (REG_P (XEXP (x, 2)))
10222           *cost += op1cost;
10223         else
10224           {
10225             if (speed_p)
10226               {
10227                 if (extra_cost->alu.non_exec_costs_exec)
10228                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10229                 else
10230                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10231               }
10232             else
10233               *cost += op1cost + op2cost;
10234           }
10235       }
10236       return true;
10237
10238     case COMPARE:
10239       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10240         *cost = 0;
10241       else
10242         {
10243           machine_mode op0mode;
10244           /* We'll mostly assume that the cost of a compare is the cost of the
10245              LHS.  However, there are some notable exceptions.  */
10246
10247           /* Floating point compares are never done as side-effects.  */
10248           op0mode = GET_MODE (XEXP (x, 0));
10249           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10250               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10251             {
10252               if (speed_p)
10253                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10254
10255               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10256                 {
10257                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10258                   return true;
10259                 }
10260
10261               return false;
10262             }
10263           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10264             {
10265               *cost = LIBCALL_COST (2);
10266               return false;
10267             }
10268
10269           /* DImode compares normally take two insns.  */
10270           if (op0mode == DImode)
10271             {
10272               *cost += COSTS_N_INSNS (1);
10273               if (speed_p)
10274                 *cost += 2 * extra_cost->alu.arith;
10275               return false;
10276             }
10277
10278           if (op0mode == SImode)
10279             {
10280               rtx shift_op;
10281               rtx shift_reg;
10282
10283               if (XEXP (x, 1) == const0_rtx
10284                   && !(REG_P (XEXP (x, 0))
10285                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10286                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10287                 {
10288                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10289
10290                   /* Multiply operations that set the flags are often
10291                      significantly more expensive.  */
10292                   if (speed_p
10293                       && GET_CODE (XEXP (x, 0)) == MULT
10294                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10295                     *cost += extra_cost->mult[0].flag_setting;
10296
10297                   if (speed_p
10298                       && GET_CODE (XEXP (x, 0)) == PLUS
10299                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10300                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10301                                                             0), 1), mode))
10302                     *cost += extra_cost->mult[0].flag_setting;
10303                   return true;
10304                 }
10305
10306               shift_reg = NULL;
10307               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10308               if (shift_op != NULL)
10309                 {
10310                   if (shift_reg != NULL)
10311                     {
10312                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10313                                          1, speed_p);
10314                       if (speed_p)
10315                         *cost += extra_cost->alu.arith_shift_reg;
10316                     }
10317                   else if (speed_p)
10318                     *cost += extra_cost->alu.arith_shift;
10319                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10320                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10321                   return true;
10322                 }
10323
10324               if (speed_p)
10325                 *cost += extra_cost->alu.arith;
10326               if (CONST_INT_P (XEXP (x, 1))
10327                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10328                 {
10329                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10330                   return true;
10331                 }
10332               return false;
10333             }
10334
10335           /* Vector mode?  */
10336
10337           *cost = LIBCALL_COST (2);
10338           return false;
10339         }
10340       return true;
10341
10342     case EQ:
10343     case NE:
10344     case LT:
10345     case LE:
10346     case GT:
10347     case GE:
10348     case LTU:
10349     case LEU:
10350     case GEU:
10351     case GTU:
10352     case ORDERED:
10353     case UNORDERED:
10354     case UNEQ:
10355     case UNLE:
10356     case UNLT:
10357     case UNGE:
10358     case UNGT:
10359     case LTGT:
10360       if (outer_code == SET)
10361         {
10362           /* Is it a store-flag operation?  */
10363           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10364               && XEXP (x, 1) == const0_rtx)
10365             {
10366               /* Thumb also needs an IT insn.  */
10367               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10368               return true;
10369             }
10370           if (XEXP (x, 1) == const0_rtx)
10371             {
10372               switch (code)
10373                 {
10374                 case LT:
10375                   /* LSR Rd, Rn, #31.  */
10376                   if (speed_p)
10377                     *cost += extra_cost->alu.shift;
10378                   break;
10379
10380                 case EQ:
10381                   /* RSBS T1, Rn, #0
10382                      ADC  Rd, Rn, T1.  */
10383
10384                 case NE:
10385                   /* SUBS T1, Rn, #1
10386                      SBC  Rd, Rn, T1.  */
10387                   *cost += COSTS_N_INSNS (1);
10388                   break;
10389
10390                 case LE:
10391                   /* RSBS T1, Rn, Rn, LSR #31
10392                      ADC  Rd, Rn, T1. */
10393                   *cost += COSTS_N_INSNS (1);
10394                   if (speed_p)
10395                     *cost += extra_cost->alu.arith_shift;
10396                   break;
10397
10398                 case GT:
10399                   /* RSB  Rd, Rn, Rn, ASR #1
10400                      LSR  Rd, Rd, #31.  */
10401                   *cost += COSTS_N_INSNS (1);
10402                   if (speed_p)
10403                     *cost += (extra_cost->alu.arith_shift
10404                               + extra_cost->alu.shift);
10405                   break;
10406
10407                 case GE:
10408                   /* ASR  Rd, Rn, #31
10409                      ADD  Rd, Rn, #1.  */
10410                   *cost += COSTS_N_INSNS (1);
10411                   if (speed_p)
10412                     *cost += extra_cost->alu.shift;
10413                   break;
10414
10415                 default:
10416                   /* Remaining cases are either meaningless or would take
10417                      three insns anyway.  */
10418                   *cost = COSTS_N_INSNS (3);
10419                   break;
10420                 }
10421               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10422               return true;
10423             }
10424           else
10425             {
10426               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10427               if (CONST_INT_P (XEXP (x, 1))
10428                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10429                 {
10430                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10431                   return true;
10432                 }
10433
10434               return false;
10435             }
10436         }
10437       /* Not directly inside a set.  If it involves the condition code
10438          register it must be the condition for a branch, cond_exec or
10439          I_T_E operation.  Since the comparison is performed elsewhere
10440          this is just the control part which has no additional
10441          cost.  */
10442       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10443                && XEXP (x, 1) == const0_rtx)
10444         {
10445           *cost = 0;
10446           return true;
10447         }
10448       return false;
10449
10450     case ABS:
10451       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10452           && (mode == SFmode || !TARGET_VFP_SINGLE))
10453         {
10454           if (speed_p)
10455             *cost += extra_cost->fp[mode != SFmode].neg;
10456
10457           return false;
10458         }
10459       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10460         {
10461           *cost = LIBCALL_COST (1);
10462           return false;
10463         }
10464
10465       if (mode == SImode)
10466         {
10467           if (speed_p)
10468             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469           return false;
10470         }
10471       /* Vector mode?  */
10472       *cost = LIBCALL_COST (1);
10473       return false;
10474
10475     case SIGN_EXTEND:
10476       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477           && MEM_P (XEXP (x, 0)))
10478         {
10479           if (mode == DImode)
10480             *cost += COSTS_N_INSNS (1);
10481
10482           if (!speed_p)
10483             return true;
10484
10485           if (GET_MODE (XEXP (x, 0)) == SImode)
10486             *cost += extra_cost->ldst.load;
10487           else
10488             *cost += extra_cost->ldst.load_sign_extend;
10489
10490           if (mode == DImode)
10491             *cost += extra_cost->alu.shift;
10492
10493           return true;
10494         }
10495
10496       /* Widening from less than 32-bits requires an extend operation.  */
10497       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10498         {
10499           /* We have SXTB/SXTH.  */
10500           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10501           if (speed_p)
10502             *cost += extra_cost->alu.extend;
10503         }
10504       else if (GET_MODE (XEXP (x, 0)) != SImode)
10505         {
10506           /* Needs two shifts.  */
10507           *cost += COSTS_N_INSNS (1);
10508           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10509           if (speed_p)
10510             *cost += 2 * extra_cost->alu.shift;
10511         }
10512
10513       /* Widening beyond 32-bits requires one more insn.  */
10514       if (mode == DImode)
10515         {
10516           *cost += COSTS_N_INSNS (1);
10517           if (speed_p)
10518             *cost += extra_cost->alu.shift;
10519         }
10520
10521       return true;
10522
10523     case ZERO_EXTEND:
10524       if ((arm_arch4
10525            || GET_MODE (XEXP (x, 0)) == SImode
10526            || GET_MODE (XEXP (x, 0)) == QImode)
10527           && MEM_P (XEXP (x, 0)))
10528         {
10529           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10530
10531           if (mode == DImode)
10532             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10533
10534           return true;
10535         }
10536
10537       /* Widening from less than 32-bits requires an extend operation.  */
10538       if (GET_MODE (XEXP (x, 0)) == QImode)
10539         {
10540           /* UXTB can be a shorter instruction in Thumb2, but it might
10541              be slower than the AND Rd, Rn, #255 alternative.  When
10542              optimizing for speed it should never be slower to use
10543              AND, and we don't really model 16-bit vs 32-bit insns
10544              here.  */
10545           if (speed_p)
10546             *cost += extra_cost->alu.logical;
10547         }
10548       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10549         {
10550           /* We have UXTB/UXTH.  */
10551           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10552           if (speed_p)
10553             *cost += extra_cost->alu.extend;
10554         }
10555       else if (GET_MODE (XEXP (x, 0)) != SImode)
10556         {
10557           /* Needs two shifts.  It's marginally preferable to use
10558              shifts rather than two BIC instructions as the second
10559              shift may merge with a subsequent insn as a shifter
10560              op.  */
10561           *cost = COSTS_N_INSNS (2);
10562           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10563           if (speed_p)
10564             *cost += 2 * extra_cost->alu.shift;
10565         }
10566
10567       /* Widening beyond 32-bits requires one more insn.  */
10568       if (mode == DImode)
10569         {
10570           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10571         }
10572
10573       return true;
10574
10575     case CONST_INT:
10576       *cost = 0;
10577       /* CONST_INT has no mode, so we cannot tell for sure how many
10578          insns are really going to be needed.  The best we can do is
10579          look at the value passed.  If it fits in SImode, then assume
10580          that's the mode it will be used for.  Otherwise assume it
10581          will be used in DImode.  */
10582       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10583         mode = SImode;
10584       else
10585         mode = DImode;
10586
10587       /* Avoid blowing up in arm_gen_constant ().  */
10588       if (!(outer_code == PLUS
10589             || outer_code == AND
10590             || outer_code == IOR
10591             || outer_code == XOR
10592             || outer_code == MINUS))
10593         outer_code = SET;
10594
10595     const_int_cost:
10596       if (mode == SImode)
10597         {
10598           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10599                                                     INTVAL (x), NULL, NULL,
10600                                                     0, 0));
10601           /* Extra costs?  */
10602         }
10603       else
10604         {
10605           *cost += COSTS_N_INSNS (arm_gen_constant
10606                                   (outer_code, SImode, NULL,
10607                                    trunc_int_for_mode (INTVAL (x), SImode),
10608                                    NULL, NULL, 0, 0)
10609                                   + arm_gen_constant (outer_code, SImode, NULL,
10610                                                       INTVAL (x) >> 32, NULL,
10611                                                       NULL, 0, 0));
10612           /* Extra costs?  */
10613         }
10614
10615       return true;
10616
10617     case CONST:
10618     case LABEL_REF:
10619     case SYMBOL_REF:
10620       if (speed_p)
10621         {
10622           if (arm_arch_thumb2 && !flag_pic)
10623             *cost += COSTS_N_INSNS (1);
10624           else
10625             *cost += extra_cost->ldst.load;
10626         }
10627       else
10628         *cost += COSTS_N_INSNS (1);
10629
10630       if (flag_pic)
10631         {
10632           *cost += COSTS_N_INSNS (1);
10633           if (speed_p)
10634             *cost += extra_cost->alu.arith;
10635         }
10636
10637       return true;
10638
10639     case CONST_FIXED:
10640       *cost = COSTS_N_INSNS (4);
10641       /* Fixme.  */
10642       return true;
10643
10644     case CONST_DOUBLE:
10645       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646           && (mode == SFmode || !TARGET_VFP_SINGLE))
10647         {
10648           if (vfp3_const_double_rtx (x))
10649             {
10650               if (speed_p)
10651                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10652               return true;
10653             }
10654
10655           if (speed_p)
10656             {
10657               if (mode == DFmode)
10658                 *cost += extra_cost->ldst.loadd;
10659               else
10660                 *cost += extra_cost->ldst.loadf;
10661             }
10662           else
10663             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10664
10665           return true;
10666         }
10667       *cost = COSTS_N_INSNS (4);
10668       return true;
10669
10670     case CONST_VECTOR:
10671       /* Fixme.  */
10672       if (TARGET_NEON
10673           && TARGET_HARD_FLOAT
10674           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10675           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10676         *cost = COSTS_N_INSNS (1);
10677       else
10678         *cost = COSTS_N_INSNS (4);
10679       return true;
10680
10681     case HIGH:
10682     case LO_SUM:
10683       /* When optimizing for size, we prefer constant pool entries to
10684          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10685       if (!speed_p)
10686         *cost += 1;
10687       return true;
10688
10689     case CLZ:
10690       if (speed_p)
10691         *cost += extra_cost->alu.clz;
10692       return false;
10693
10694     case SMIN:
10695       if (XEXP (x, 1) == const0_rtx)
10696         {
10697           if (speed_p)
10698             *cost += extra_cost->alu.log_shift;
10699           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10700           return true;
10701         }
10702       /* Fall through.  */
10703     case SMAX:
10704     case UMIN:
10705     case UMAX:
10706       *cost += COSTS_N_INSNS (1);
10707       return false;
10708
10709     case TRUNCATE:
10710       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10711           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10712           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10713           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10714           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10715                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10716               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10717                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10718                       == ZERO_EXTEND))))
10719         {
10720           if (speed_p)
10721             *cost += extra_cost->mult[1].extend;
10722           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10723                               ZERO_EXTEND, 0, speed_p)
10724                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10725                                 ZERO_EXTEND, 0, speed_p));
10726           return true;
10727         }
10728       *cost = LIBCALL_COST (1);
10729       return false;
10730
10731     case UNSPEC_VOLATILE:
10732     case UNSPEC:
10733       return arm_unspec_cost (x, outer_code, speed_p, cost);
10734
10735     case PC:
10736       /* Reading the PC is like reading any other register.  Writing it
10737          is more expensive, but we take that into account elsewhere.  */
10738       *cost = 0;
10739       return true;
10740
10741     case ZERO_EXTRACT:
10742       /* TODO: Simple zero_extract of bottom bits using AND.  */
10743       /* Fall through.  */
10744     case SIGN_EXTRACT:
10745       if (arm_arch6
10746           && mode == SImode
10747           && CONST_INT_P (XEXP (x, 1))
10748           && CONST_INT_P (XEXP (x, 2)))
10749         {
10750           if (speed_p)
10751             *cost += extra_cost->alu.bfx;
10752           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10753           return true;
10754         }
10755       /* Without UBFX/SBFX, need to resort to shift operations.  */
10756       *cost += COSTS_N_INSNS (1);
10757       if (speed_p)
10758         *cost += 2 * extra_cost->alu.shift;
10759       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10760       return true;
10761
10762     case FLOAT_EXTEND:
10763       if (TARGET_HARD_FLOAT)
10764         {
10765           if (speed_p)
10766             *cost += extra_cost->fp[mode == DFmode].widen;
10767           if (!TARGET_VFP5
10768               && GET_MODE (XEXP (x, 0)) == HFmode)
10769             {
10770               /* Pre v8, widening HF->DF is a two-step process, first
10771                  widening to SFmode.  */
10772               *cost += COSTS_N_INSNS (1);
10773               if (speed_p)
10774                 *cost += extra_cost->fp[0].widen;
10775             }
10776           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10777           return true;
10778         }
10779
10780       *cost = LIBCALL_COST (1);
10781       return false;
10782
10783     case FLOAT_TRUNCATE:
10784       if (TARGET_HARD_FLOAT)
10785         {
10786           if (speed_p)
10787             *cost += extra_cost->fp[mode == DFmode].narrow;
10788           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10789           return true;
10790           /* Vector modes?  */
10791         }
10792       *cost = LIBCALL_COST (1);
10793       return false;
10794
10795     case FMA:
10796       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10797         {
10798           rtx op0 = XEXP (x, 0);
10799           rtx op1 = XEXP (x, 1);
10800           rtx op2 = XEXP (x, 2);
10801
10802
10803           /* vfms or vfnma.  */
10804           if (GET_CODE (op0) == NEG)
10805             op0 = XEXP (op0, 0);
10806
10807           /* vfnms or vfnma.  */
10808           if (GET_CODE (op2) == NEG)
10809             op2 = XEXP (op2, 0);
10810
10811           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10812           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10813           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10814
10815           if (speed_p)
10816             *cost += extra_cost->fp[mode ==DFmode].fma;
10817
10818           return true;
10819         }
10820
10821       *cost = LIBCALL_COST (3);
10822       return false;
10823
10824     case FIX:
10825     case UNSIGNED_FIX:
10826       if (TARGET_HARD_FLOAT)
10827         {
10828           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10829              a vcvt fixed-point conversion.  */
10830           if (code == FIX && mode == SImode
10831               && GET_CODE (XEXP (x, 0)) == FIX
10832               && GET_MODE (XEXP (x, 0)) == SFmode
10833               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10834               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10835                  > 0)
10836             {
10837               if (speed_p)
10838                 *cost += extra_cost->fp[0].toint;
10839
10840               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10841                                  code, 0, speed_p);
10842               return true;
10843             }
10844
10845           if (GET_MODE_CLASS (mode) == MODE_INT)
10846             {
10847               mode = GET_MODE (XEXP (x, 0));
10848               if (speed_p)
10849                 *cost += extra_cost->fp[mode == DFmode].toint;
10850               /* Strip of the 'cost' of rounding towards zero.  */
10851               if (GET_CODE (XEXP (x, 0)) == FIX)
10852                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10853                                    0, speed_p);
10854               else
10855                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10856               /* ??? Increase the cost to deal with transferring from
10857                  FP -> CORE registers?  */
10858               return true;
10859             }
10860           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10861                    && TARGET_VFP5)
10862             {
10863               if (speed_p)
10864                 *cost += extra_cost->fp[mode == DFmode].roundint;
10865               return false;
10866             }
10867           /* Vector costs? */
10868         }
10869       *cost = LIBCALL_COST (1);
10870       return false;
10871
10872     case FLOAT:
10873     case UNSIGNED_FLOAT:
10874       if (TARGET_HARD_FLOAT)
10875         {
10876           /* ??? Increase the cost to deal with transferring from CORE
10877              -> FP registers?  */
10878           if (speed_p)
10879             *cost += extra_cost->fp[mode == DFmode].fromint;
10880           return false;
10881         }
10882       *cost = LIBCALL_COST (1);
10883       return false;
10884
10885     case CALL:
10886       return true;
10887
10888     case ASM_OPERANDS:
10889       {
10890       /* Just a guess.  Guess number of instructions in the asm
10891          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10892          though (see PR60663).  */
10893         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10894         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10895
10896         *cost = COSTS_N_INSNS (asm_length + num_operands);
10897         return true;
10898       }
10899     default:
10900       if (mode != VOIDmode)
10901         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10902       else
10903         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10904       return false;
10905     }
10906 }
10907
10908 #undef HANDLE_NARROW_SHIFT_ARITH
10909
10910 /* RTX costs entry point.  */
10911
10912 static bool
10913 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10914                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10915 {
10916   bool result;
10917   int code = GET_CODE (x);
10918   gcc_assert (current_tune->insn_extra_cost);
10919
10920   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10921                                 (enum rtx_code) outer_code,
10922                                 current_tune->insn_extra_cost,
10923                                 total, speed);
10924
10925   if (dump_file && (dump_flags & TDF_DETAILS))
10926     {
10927       print_rtl_single (dump_file, x);
10928       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10929                *total, result ? "final" : "partial");
10930     }
10931   return result;
10932 }
10933
10934 /* All address computations that can be done are free, but rtx cost returns
10935    the same for practically all of them.  So we weight the different types
10936    of address here in the order (most pref first):
10937    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10938 static inline int
10939 arm_arm_address_cost (rtx x)
10940 {
10941   enum rtx_code c  = GET_CODE (x);
10942
10943   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10944     return 0;
10945   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10946     return 10;
10947
10948   if (c == PLUS)
10949     {
10950       if (CONST_INT_P (XEXP (x, 1)))
10951         return 2;
10952
10953       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10954         return 3;
10955
10956       return 4;
10957     }
10958
10959   return 6;
10960 }
10961
10962 static inline int
10963 arm_thumb_address_cost (rtx x)
10964 {
10965   enum rtx_code c  = GET_CODE (x);
10966
10967   if (c == REG)
10968     return 1;
10969   if (c == PLUS
10970       && REG_P (XEXP (x, 0))
10971       && CONST_INT_P (XEXP (x, 1)))
10972     return 1;
10973
10974   return 2;
10975 }
10976
10977 static int
10978 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10979                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10980 {
10981   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10982 }
10983
10984 /* Adjust cost hook for XScale.  */
10985 static bool
10986 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10987                           int * cost)
10988 {
10989   /* Some true dependencies can have a higher cost depending
10990      on precisely how certain input operands are used.  */
10991   if (dep_type == 0
10992       && recog_memoized (insn) >= 0
10993       && recog_memoized (dep) >= 0)
10994     {
10995       int shift_opnum = get_attr_shift (insn);
10996       enum attr_type attr_type = get_attr_type (dep);
10997
10998       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10999          operand for INSN.  If we have a shifted input operand and the
11000          instruction we depend on is another ALU instruction, then we may
11001          have to account for an additional stall.  */
11002       if (shift_opnum != 0
11003           && (attr_type == TYPE_ALU_SHIFT_IMM
11004               || attr_type == TYPE_ALUS_SHIFT_IMM
11005               || attr_type == TYPE_LOGIC_SHIFT_IMM
11006               || attr_type == TYPE_LOGICS_SHIFT_IMM
11007               || attr_type == TYPE_ALU_SHIFT_REG
11008               || attr_type == TYPE_ALUS_SHIFT_REG
11009               || attr_type == TYPE_LOGIC_SHIFT_REG
11010               || attr_type == TYPE_LOGICS_SHIFT_REG
11011               || attr_type == TYPE_MOV_SHIFT
11012               || attr_type == TYPE_MVN_SHIFT
11013               || attr_type == TYPE_MOV_SHIFT_REG
11014               || attr_type == TYPE_MVN_SHIFT_REG))
11015         {
11016           rtx shifted_operand;
11017           int opno;
11018
11019           /* Get the shifted operand.  */
11020           extract_insn (insn);
11021           shifted_operand = recog_data.operand[shift_opnum];
11022
11023           /* Iterate over all the operands in DEP.  If we write an operand
11024              that overlaps with SHIFTED_OPERAND, then we have increase the
11025              cost of this dependency.  */
11026           extract_insn (dep);
11027           preprocess_constraints (dep);
11028           for (opno = 0; opno < recog_data.n_operands; opno++)
11029             {
11030               /* We can ignore strict inputs.  */
11031               if (recog_data.operand_type[opno] == OP_IN)
11032                 continue;
11033
11034               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11035                                            shifted_operand))
11036                 {
11037                   *cost = 2;
11038                   return false;
11039                 }
11040             }
11041         }
11042     }
11043   return true;
11044 }
11045
11046 /* Adjust cost hook for Cortex A9.  */
11047 static bool
11048 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11049                              int * cost)
11050 {
11051   switch (dep_type)
11052     {
11053     case REG_DEP_ANTI:
11054       *cost = 0;
11055       return false;
11056
11057     case REG_DEP_TRUE:
11058     case REG_DEP_OUTPUT:
11059         if (recog_memoized (insn) >= 0
11060             && recog_memoized (dep) >= 0)
11061           {
11062             if (GET_CODE (PATTERN (insn)) == SET)
11063               {
11064                 if (GET_MODE_CLASS
11065                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11066                   || GET_MODE_CLASS
11067                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11068                   {
11069                     enum attr_type attr_type_insn = get_attr_type (insn);
11070                     enum attr_type attr_type_dep = get_attr_type (dep);
11071
11072                     /* By default all dependencies of the form
11073                        s0 = s0 <op> s1
11074                        s0 = s0 <op> s2
11075                        have an extra latency of 1 cycle because
11076                        of the input and output dependency in this
11077                        case. However this gets modeled as an true
11078                        dependency and hence all these checks.  */
11079                     if (REG_P (SET_DEST (PATTERN (insn)))
11080                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11081                       {
11082                         /* FMACS is a special case where the dependent
11083                            instruction can be issued 3 cycles before
11084                            the normal latency in case of an output
11085                            dependency.  */
11086                         if ((attr_type_insn == TYPE_FMACS
11087                              || attr_type_insn == TYPE_FMACD)
11088                             && (attr_type_dep == TYPE_FMACS
11089                                 || attr_type_dep == TYPE_FMACD))
11090                           {
11091                             if (dep_type == REG_DEP_OUTPUT)
11092                               *cost = insn_default_latency (dep) - 3;
11093                             else
11094                               *cost = insn_default_latency (dep);
11095                             return false;
11096                           }
11097                         else
11098                           {
11099                             if (dep_type == REG_DEP_OUTPUT)
11100                               *cost = insn_default_latency (dep) + 1;
11101                             else
11102                               *cost = insn_default_latency (dep);
11103                           }
11104                         return false;
11105                       }
11106                   }
11107               }
11108           }
11109         break;
11110
11111     default:
11112       gcc_unreachable ();
11113     }
11114
11115   return true;
11116 }
11117
11118 /* Adjust cost hook for FA726TE.  */
11119 static bool
11120 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11121                            int * cost)
11122 {
11123   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11124      have penalty of 3.  */
11125   if (dep_type == REG_DEP_TRUE
11126       && recog_memoized (insn) >= 0
11127       && recog_memoized (dep) >= 0
11128       && get_attr_conds (dep) == CONDS_SET)
11129     {
11130       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11131       if (get_attr_conds (insn) == CONDS_USE
11132           && get_attr_type (insn) != TYPE_BRANCH)
11133         {
11134           *cost = 3;
11135           return false;
11136         }
11137
11138       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11139           || get_attr_conds (insn) == CONDS_USE)
11140         {
11141           *cost = 0;
11142           return false;
11143         }
11144     }
11145
11146   return true;
11147 }
11148
11149 /* Implement TARGET_REGISTER_MOVE_COST.
11150
11151    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11152    it is typically more expensive than a single memory access.  We set
11153    the cost to less than two memory accesses so that floating
11154    point to integer conversion does not go through memory.  */
11155
11156 int
11157 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11158                         reg_class_t from, reg_class_t to)
11159 {
11160   if (TARGET_32BIT)
11161     {
11162       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11163           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11164         return 15;
11165       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11166                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11167         return 4;
11168       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11169         return 20;
11170       else
11171         return 2;
11172     }
11173   else
11174     {
11175       if (from == HI_REGS || to == HI_REGS)
11176         return 4;
11177       else
11178         return 2;
11179     }
11180 }
11181
11182 /* Implement TARGET_MEMORY_MOVE_COST.  */
11183
11184 int
11185 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11186                       bool in ATTRIBUTE_UNUSED)
11187 {
11188   if (TARGET_32BIT)
11189     return 10;
11190   else
11191     {
11192       if (GET_MODE_SIZE (mode) < 4)
11193         return 8;
11194       else
11195         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11196     }
11197 }
11198
11199 /* Vectorizer cost model implementation.  */
11200
11201 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11202 static int
11203 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11204                                 tree vectype,
11205                                 int misalign ATTRIBUTE_UNUSED)
11206 {
11207   unsigned elements;
11208
11209   switch (type_of_cost)
11210     {
11211       case scalar_stmt:
11212         return current_tune->vec_costs->scalar_stmt_cost;
11213
11214       case scalar_load:
11215         return current_tune->vec_costs->scalar_load_cost;
11216
11217       case scalar_store:
11218         return current_tune->vec_costs->scalar_store_cost;
11219
11220       case vector_stmt:
11221         return current_tune->vec_costs->vec_stmt_cost;
11222
11223       case vector_load:
11224         return current_tune->vec_costs->vec_align_load_cost;
11225
11226       case vector_store:
11227         return current_tune->vec_costs->vec_store_cost;
11228
11229       case vec_to_scalar:
11230         return current_tune->vec_costs->vec_to_scalar_cost;
11231
11232       case scalar_to_vec:
11233         return current_tune->vec_costs->scalar_to_vec_cost;
11234
11235       case unaligned_load:
11236         return current_tune->vec_costs->vec_unalign_load_cost;
11237
11238       case unaligned_store:
11239         return current_tune->vec_costs->vec_unalign_store_cost;
11240
11241       case cond_branch_taken:
11242         return current_tune->vec_costs->cond_taken_branch_cost;
11243
11244       case cond_branch_not_taken:
11245         return current_tune->vec_costs->cond_not_taken_branch_cost;
11246
11247       case vec_perm:
11248       case vec_promote_demote:
11249         return current_tune->vec_costs->vec_stmt_cost;
11250
11251       case vec_construct:
11252         elements = TYPE_VECTOR_SUBPARTS (vectype);
11253         return elements / 2 + 1;
11254
11255       default:
11256         gcc_unreachable ();
11257     }
11258 }
11259
11260 /* Implement targetm.vectorize.add_stmt_cost.  */
11261
11262 static unsigned
11263 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11264                    struct _stmt_vec_info *stmt_info, int misalign,
11265                    enum vect_cost_model_location where)
11266 {
11267   unsigned *cost = (unsigned *) data;
11268   unsigned retval = 0;
11269
11270   if (flag_vect_cost_model)
11271     {
11272       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11273       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11274
11275       /* Statements in an inner loop relative to the loop being
11276          vectorized are weighted more heavily.  The value here is
11277          arbitrary and could potentially be improved with analysis.  */
11278       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11279         count *= 50;  /* FIXME.  */
11280
11281       retval = (unsigned) (count * stmt_cost);
11282       cost[where] += retval;
11283     }
11284
11285   return retval;
11286 }
11287
11288 /* Return true if and only if this insn can dual-issue only as older.  */
11289 static bool
11290 cortexa7_older_only (rtx_insn *insn)
11291 {
11292   if (recog_memoized (insn) < 0)
11293     return false;
11294
11295   switch (get_attr_type (insn))
11296     {
11297     case TYPE_ALU_DSP_REG:
11298     case TYPE_ALU_SREG:
11299     case TYPE_ALUS_SREG:
11300     case TYPE_LOGIC_REG:
11301     case TYPE_LOGICS_REG:
11302     case TYPE_ADC_REG:
11303     case TYPE_ADCS_REG:
11304     case TYPE_ADR:
11305     case TYPE_BFM:
11306     case TYPE_REV:
11307     case TYPE_MVN_REG:
11308     case TYPE_SHIFT_IMM:
11309     case TYPE_SHIFT_REG:
11310     case TYPE_LOAD_BYTE:
11311     case TYPE_LOAD1:
11312     case TYPE_STORE1:
11313     case TYPE_FFARITHS:
11314     case TYPE_FADDS:
11315     case TYPE_FFARITHD:
11316     case TYPE_FADDD:
11317     case TYPE_FMOV:
11318     case TYPE_F_CVT:
11319     case TYPE_FCMPS:
11320     case TYPE_FCMPD:
11321     case TYPE_FCONSTS:
11322     case TYPE_FCONSTD:
11323     case TYPE_FMULS:
11324     case TYPE_FMACS:
11325     case TYPE_FMULD:
11326     case TYPE_FMACD:
11327     case TYPE_FDIVS:
11328     case TYPE_FDIVD:
11329     case TYPE_F_MRC:
11330     case TYPE_F_MRRC:
11331     case TYPE_F_FLAG:
11332     case TYPE_F_LOADS:
11333     case TYPE_F_STORES:
11334       return true;
11335     default:
11336       return false;
11337     }
11338 }
11339
11340 /* Return true if and only if this insn can dual-issue as younger.  */
11341 static bool
11342 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11343 {
11344   if (recog_memoized (insn) < 0)
11345     {
11346       if (verbose > 5)
11347         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11348       return false;
11349     }
11350
11351   switch (get_attr_type (insn))
11352     {
11353     case TYPE_ALU_IMM:
11354     case TYPE_ALUS_IMM:
11355     case TYPE_LOGIC_IMM:
11356     case TYPE_LOGICS_IMM:
11357     case TYPE_EXTEND:
11358     case TYPE_MVN_IMM:
11359     case TYPE_MOV_IMM:
11360     case TYPE_MOV_REG:
11361     case TYPE_MOV_SHIFT:
11362     case TYPE_MOV_SHIFT_REG:
11363     case TYPE_BRANCH:
11364     case TYPE_CALL:
11365       return true;
11366     default:
11367       return false;
11368     }
11369 }
11370
11371
11372 /* Look for an instruction that can dual issue only as an older
11373    instruction, and move it in front of any instructions that can
11374    dual-issue as younger, while preserving the relative order of all
11375    other instructions in the ready list.  This is a hueuristic to help
11376    dual-issue in later cycles, by postponing issue of more flexible
11377    instructions.  This heuristic may affect dual issue opportunities
11378    in the current cycle.  */
11379 static void
11380 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11381                         int *n_readyp, int clock)
11382 {
11383   int i;
11384   int first_older_only = -1, first_younger = -1;
11385
11386   if (verbose > 5)
11387     fprintf (file,
11388              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11389              clock,
11390              *n_readyp);
11391
11392   /* Traverse the ready list from the head (the instruction to issue
11393      first), and looking for the first instruction that can issue as
11394      younger and the first instruction that can dual-issue only as
11395      older.  */
11396   for (i = *n_readyp - 1; i >= 0; i--)
11397     {
11398       rtx_insn *insn = ready[i];
11399       if (cortexa7_older_only (insn))
11400         {
11401           first_older_only = i;
11402           if (verbose > 5)
11403             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11404           break;
11405         }
11406       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11407         first_younger = i;
11408     }
11409
11410   /* Nothing to reorder because either no younger insn found or insn
11411      that can dual-issue only as older appears before any insn that
11412      can dual-issue as younger.  */
11413   if (first_younger == -1)
11414     {
11415       if (verbose > 5)
11416         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11417       return;
11418     }
11419
11420   /* Nothing to reorder because no older-only insn in the ready list.  */
11421   if (first_older_only == -1)
11422     {
11423       if (verbose > 5)
11424         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11425       return;
11426     }
11427
11428   /* Move first_older_only insn before first_younger.  */
11429   if (verbose > 5)
11430     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11431              INSN_UID(ready [first_older_only]),
11432              INSN_UID(ready [first_younger]));
11433   rtx_insn *first_older_only_insn = ready [first_older_only];
11434   for (i = first_older_only; i < first_younger; i++)
11435     {
11436       ready[i] = ready[i+1];
11437     }
11438
11439   ready[i] = first_older_only_insn;
11440   return;
11441 }
11442
11443 /* Implement TARGET_SCHED_REORDER. */
11444 static int
11445 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11446                    int clock)
11447 {
11448   switch (arm_tune)
11449     {
11450     case TARGET_CPU_cortexa7:
11451       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11452       break;
11453     default:
11454       /* Do nothing for other cores.  */
11455       break;
11456     }
11457
11458   return arm_issue_rate ();
11459 }
11460
11461 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11462    It corrects the value of COST based on the relationship between
11463    INSN and DEP through the dependence LINK.  It returns the new
11464    value. There is a per-core adjust_cost hook to adjust scheduler costs
11465    and the per-core hook can choose to completely override the generic
11466    adjust_cost function. Only put bits of code into arm_adjust_cost that
11467    are common across all cores.  */
11468 static int
11469 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11470                  unsigned int)
11471 {
11472   rtx i_pat, d_pat;
11473
11474  /* When generating Thumb-1 code, we want to place flag-setting operations
11475     close to a conditional branch which depends on them, so that we can
11476     omit the comparison. */
11477   if (TARGET_THUMB1
11478       && dep_type == 0
11479       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11480       && recog_memoized (dep) >= 0
11481       && get_attr_conds (dep) == CONDS_SET)
11482     return 0;
11483
11484   if (current_tune->sched_adjust_cost != NULL)
11485     {
11486       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11487         return cost;
11488     }
11489
11490   /* XXX Is this strictly true?  */
11491   if (dep_type == REG_DEP_ANTI
11492       || dep_type == REG_DEP_OUTPUT)
11493     return 0;
11494
11495   /* Call insns don't incur a stall, even if they follow a load.  */
11496   if (dep_type == 0
11497       && CALL_P (insn))
11498     return 1;
11499
11500   if ((i_pat = single_set (insn)) != NULL
11501       && MEM_P (SET_SRC (i_pat))
11502       && (d_pat = single_set (dep)) != NULL
11503       && MEM_P (SET_DEST (d_pat)))
11504     {
11505       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11506       /* This is a load after a store, there is no conflict if the load reads
11507          from a cached area.  Assume that loads from the stack, and from the
11508          constant pool are cached, and that others will miss.  This is a
11509          hack.  */
11510
11511       if ((GET_CODE (src_mem) == SYMBOL_REF
11512            && CONSTANT_POOL_ADDRESS_P (src_mem))
11513           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11514           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11515           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11516         return 1;
11517     }
11518
11519   return cost;
11520 }
11521
11522 int
11523 arm_max_conditional_execute (void)
11524 {
11525   return max_insns_skipped;
11526 }
11527
11528 static int
11529 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11530 {
11531   if (TARGET_32BIT)
11532     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11533   else
11534     return (optimize > 0) ? 2 : 0;
11535 }
11536
11537 static int
11538 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11539 {
11540   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11541 }
11542
11543 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11544    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11545    sequences of non-executed instructions in IT blocks probably take the same
11546    amount of time as executed instructions (and the IT instruction itself takes
11547    space in icache).  This function was experimentally determined to give good
11548    results on a popular embedded benchmark.  */
11549
11550 static int
11551 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11552 {
11553   return (TARGET_32BIT && speed_p) ? 1
11554          : arm_default_branch_cost (speed_p, predictable_p);
11555 }
11556
11557 static int
11558 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11559 {
11560   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11561 }
11562
11563 static bool fp_consts_inited = false;
11564
11565 static REAL_VALUE_TYPE value_fp0;
11566
11567 static void
11568 init_fp_table (void)
11569 {
11570   REAL_VALUE_TYPE r;
11571
11572   r = REAL_VALUE_ATOF ("0", DFmode);
11573   value_fp0 = r;
11574   fp_consts_inited = true;
11575 }
11576
11577 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11578 int
11579 arm_const_double_rtx (rtx x)
11580 {
11581   const REAL_VALUE_TYPE *r;
11582
11583   if (!fp_consts_inited)
11584     init_fp_table ();
11585
11586   r = CONST_DOUBLE_REAL_VALUE (x);
11587   if (REAL_VALUE_MINUS_ZERO (*r))
11588     return 0;
11589
11590   if (real_equal (r, &value_fp0))
11591     return 1;
11592
11593   return 0;
11594 }
11595
11596 /* VFPv3 has a fairly wide range of representable immediates, formed from
11597    "quarter-precision" floating-point values. These can be evaluated using this
11598    formula (with ^ for exponentiation):
11599
11600      -1^s * n * 2^-r
11601
11602    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11603    16 <= n <= 31 and 0 <= r <= 7.
11604
11605    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11606
11607      - A (most-significant) is the sign bit.
11608      - BCD are the exponent (encoded as r XOR 3).
11609      - EFGH are the mantissa (encoded as n - 16).
11610 */
11611
11612 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11613    fconst[sd] instruction, or -1 if X isn't suitable.  */
11614 static int
11615 vfp3_const_double_index (rtx x)
11616 {
11617   REAL_VALUE_TYPE r, m;
11618   int sign, exponent;
11619   unsigned HOST_WIDE_INT mantissa, mant_hi;
11620   unsigned HOST_WIDE_INT mask;
11621   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11622   bool fail;
11623
11624   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11625     return -1;
11626
11627   r = *CONST_DOUBLE_REAL_VALUE (x);
11628
11629   /* We can't represent these things, so detect them first.  */
11630   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11631     return -1;
11632
11633   /* Extract sign, exponent and mantissa.  */
11634   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11635   r = real_value_abs (&r);
11636   exponent = REAL_EXP (&r);
11637   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11638      highest (sign) bit, with a fixed binary point at bit point_pos.
11639      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11640      bits for the mantissa, this may fail (low bits would be lost).  */
11641   real_ldexp (&m, &r, point_pos - exponent);
11642   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11643   mantissa = w.elt (0);
11644   mant_hi = w.elt (1);
11645
11646   /* If there are bits set in the low part of the mantissa, we can't
11647      represent this value.  */
11648   if (mantissa != 0)
11649     return -1;
11650
11651   /* Now make it so that mantissa contains the most-significant bits, and move
11652      the point_pos to indicate that the least-significant bits have been
11653      discarded.  */
11654   point_pos -= HOST_BITS_PER_WIDE_INT;
11655   mantissa = mant_hi;
11656
11657   /* We can permit four significant bits of mantissa only, plus a high bit
11658      which is always 1.  */
11659   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11660   if ((mantissa & mask) != 0)
11661     return -1;
11662
11663   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11664   mantissa >>= point_pos - 5;
11665
11666   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11667      floating-point immediate zero with Neon using an integer-zero load, but
11668      that case is handled elsewhere.)  */
11669   if (mantissa == 0)
11670     return -1;
11671
11672   gcc_assert (mantissa >= 16 && mantissa <= 31);
11673
11674   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11675      normalized significands are in the range [1, 2). (Our mantissa is shifted
11676      left 4 places at this point relative to normalized IEEE754 values).  GCC
11677      internally uses [0.5, 1) (see real.c), so the exponent returned from
11678      REAL_EXP must be altered.  */
11679   exponent = 5 - exponent;
11680
11681   if (exponent < 0 || exponent > 7)
11682     return -1;
11683
11684   /* Sign, mantissa and exponent are now in the correct form to plug into the
11685      formula described in the comment above.  */
11686   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11687 }
11688
11689 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11690 int
11691 vfp3_const_double_rtx (rtx x)
11692 {
11693   if (!TARGET_VFP3)
11694     return 0;
11695
11696   return vfp3_const_double_index (x) != -1;
11697 }
11698
11699 /* Recognize immediates which can be used in various Neon instructions. Legal
11700    immediates are described by the following table (for VMVN variants, the
11701    bitwise inverse of the constant shown is recognized. In either case, VMOV
11702    is output and the correct instruction to use for a given constant is chosen
11703    by the assembler). The constant shown is replicated across all elements of
11704    the destination vector.
11705
11706    insn elems variant constant (binary)
11707    ---- ----- ------- -----------------
11708    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11709    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11710    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11711    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11712    vmov  i16     4    00000000 abcdefgh
11713    vmov  i16     5    abcdefgh 00000000
11714    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11715    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11716    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11717    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11718    vmvn  i16    10    00000000 abcdefgh
11719    vmvn  i16    11    abcdefgh 00000000
11720    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11721    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11722    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11723    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11724    vmov   i8    16    abcdefgh
11725    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11726                       eeeeeeee ffffffff gggggggg hhhhhhhh
11727    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11728    vmov  f32    19    00000000 00000000 00000000 00000000
11729
11730    For case 18, B = !b. Representable values are exactly those accepted by
11731    vfp3_const_double_index, but are output as floating-point numbers rather
11732    than indices.
11733
11734    For case 19, we will change it to vmov.i32 when assembling.
11735
11736    Variants 0-5 (inclusive) may also be used as immediates for the second
11737    operand of VORR/VBIC instructions.
11738
11739    The INVERSE argument causes the bitwise inverse of the given operand to be
11740    recognized instead (used for recognizing legal immediates for the VAND/VORN
11741    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11742    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11743    output, rather than the real insns vbic/vorr).
11744
11745    INVERSE makes no difference to the recognition of float vectors.
11746
11747    The return value is the variant of immediate as shown in the above table, or
11748    -1 if the given value doesn't match any of the listed patterns.
11749 */
11750 static int
11751 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11752                       rtx *modconst, int *elementwidth)
11753 {
11754 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11755   matches = 1;                                  \
11756   for (i = 0; i < idx; i += (STRIDE))           \
11757     if (!(TEST))                                \
11758       matches = 0;                              \
11759   if (matches)                                  \
11760     {                                           \
11761       immtype = (CLASS);                        \
11762       elsize = (ELSIZE);                        \
11763       break;                                    \
11764     }
11765
11766   unsigned int i, elsize = 0, idx = 0, n_elts;
11767   unsigned int innersize;
11768   unsigned char bytes[16];
11769   int immtype = -1, matches;
11770   unsigned int invmask = inverse ? 0xff : 0;
11771   bool vector = GET_CODE (op) == CONST_VECTOR;
11772
11773   if (vector)
11774     n_elts = CONST_VECTOR_NUNITS (op);
11775   else
11776     {
11777       n_elts = 1;
11778       if (mode == VOIDmode)
11779         mode = DImode;
11780     }
11781
11782   innersize = GET_MODE_UNIT_SIZE (mode);
11783
11784   /* Vectors of float constants.  */
11785   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11786     {
11787       rtx el0 = CONST_VECTOR_ELT (op, 0);
11788
11789       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11790         return -1;
11791
11792       /* FP16 vectors cannot be represented.  */
11793       if (GET_MODE_INNER (mode) == HFmode)
11794         return -1;
11795
11796       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11797          are distinct in this context.  */
11798       if (!const_vec_duplicate_p (op))
11799         return -1;
11800
11801       if (modconst)
11802         *modconst = CONST_VECTOR_ELT (op, 0);
11803
11804       if (elementwidth)
11805         *elementwidth = 0;
11806
11807       if (el0 == CONST0_RTX (GET_MODE (el0)))
11808         return 19;
11809       else
11810         return 18;
11811     }
11812
11813   /* The tricks done in the code below apply for little-endian vector layout.
11814      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11815      FIXME: Implement logic for big-endian vectors.  */
11816   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11817     return -1;
11818
11819   /* Splat vector constant out into a byte vector.  */
11820   for (i = 0; i < n_elts; i++)
11821     {
11822       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11823       unsigned HOST_WIDE_INT elpart;
11824
11825       gcc_assert (CONST_INT_P (el));
11826       elpart = INTVAL (el);
11827
11828       for (unsigned int byte = 0; byte < innersize; byte++)
11829         {
11830           bytes[idx++] = (elpart & 0xff) ^ invmask;
11831           elpart >>= BITS_PER_UNIT;
11832         }
11833     }
11834
11835   /* Sanity check.  */
11836   gcc_assert (idx == GET_MODE_SIZE (mode));
11837
11838   do
11839     {
11840       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11841                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11842
11843       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11844                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11845
11846       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11847                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11848
11849       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11850                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11851
11852       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11853
11854       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11855
11856       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11857                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11858
11859       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11860                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11861
11862       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11863                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11864
11865       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11866                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11867
11868       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11869
11870       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11871
11872       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11873                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11874
11875       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11876                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11877
11878       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11879                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11880
11881       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11882                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11883
11884       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11885
11886       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11887                         && bytes[i] == bytes[(i + 8) % idx]);
11888     }
11889   while (0);
11890
11891   if (immtype == -1)
11892     return -1;
11893
11894   if (elementwidth)
11895     *elementwidth = elsize;
11896
11897   if (modconst)
11898     {
11899       unsigned HOST_WIDE_INT imm = 0;
11900
11901       /* Un-invert bytes of recognized vector, if necessary.  */
11902       if (invmask != 0)
11903         for (i = 0; i < idx; i++)
11904           bytes[i] ^= invmask;
11905
11906       if (immtype == 17)
11907         {
11908           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11909           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11910
11911           for (i = 0; i < 8; i++)
11912             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11913                    << (i * BITS_PER_UNIT);
11914
11915           *modconst = GEN_INT (imm);
11916         }
11917       else
11918         {
11919           unsigned HOST_WIDE_INT imm = 0;
11920
11921           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11922             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11923
11924           *modconst = GEN_INT (imm);
11925         }
11926     }
11927
11928   return immtype;
11929 #undef CHECK
11930 }
11931
11932 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11933    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11934    float elements), and a modified constant (whatever should be output for a
11935    VMOV) in *MODCONST.  */
11936
11937 int
11938 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11939                                rtx *modconst, int *elementwidth)
11940 {
11941   rtx tmpconst;
11942   int tmpwidth;
11943   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11944
11945   if (retval == -1)
11946     return 0;
11947
11948   if (modconst)
11949     *modconst = tmpconst;
11950
11951   if (elementwidth)
11952     *elementwidth = tmpwidth;
11953
11954   return 1;
11955 }
11956
11957 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11958    the immediate is valid, write a constant suitable for using as an operand
11959    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11960    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11961
11962 int
11963 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11964                                 rtx *modconst, int *elementwidth)
11965 {
11966   rtx tmpconst;
11967   int tmpwidth;
11968   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11969
11970   if (retval < 0 || retval > 5)
11971     return 0;
11972
11973   if (modconst)
11974     *modconst = tmpconst;
11975
11976   if (elementwidth)
11977     *elementwidth = tmpwidth;
11978
11979   return 1;
11980 }
11981
11982 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11983    the immediate is valid, write a constant suitable for using as an operand
11984    to VSHR/VSHL to *MODCONST and the corresponding element width to
11985    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11986    because they have different limitations.  */
11987
11988 int
11989 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11990                                 rtx *modconst, int *elementwidth,
11991                                 bool isleftshift)
11992 {
11993   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11994   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11995   unsigned HOST_WIDE_INT last_elt = 0;
11996   unsigned HOST_WIDE_INT maxshift;
11997
11998   /* Split vector constant out into a byte vector.  */
11999   for (i = 0; i < n_elts; i++)
12000     {
12001       rtx el = CONST_VECTOR_ELT (op, i);
12002       unsigned HOST_WIDE_INT elpart;
12003
12004       if (CONST_INT_P (el))
12005         elpart = INTVAL (el);
12006       else if (CONST_DOUBLE_P (el))
12007         return 0;
12008       else
12009         gcc_unreachable ();
12010
12011       if (i != 0 && elpart != last_elt)
12012         return 0;
12013
12014       last_elt = elpart;
12015     }
12016
12017   /* Shift less than element size.  */
12018   maxshift = innersize * 8;
12019
12020   if (isleftshift)
12021     {
12022       /* Left shift immediate value can be from 0 to <size>-1.  */
12023       if (last_elt >= maxshift)
12024         return 0;
12025     }
12026   else
12027     {
12028       /* Right shift immediate value can be from 1 to <size>.  */
12029       if (last_elt == 0 || last_elt > maxshift)
12030         return 0;
12031     }
12032
12033   if (elementwidth)
12034     *elementwidth = innersize * 8;
12035
12036   if (modconst)
12037     *modconst = CONST_VECTOR_ELT (op, 0);
12038
12039   return 1;
12040 }
12041
12042 /* Return a string suitable for output of Neon immediate logic operation
12043    MNEM.  */
12044
12045 char *
12046 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12047                              int inverse, int quad)
12048 {
12049   int width, is_valid;
12050   static char templ[40];
12051
12052   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12053
12054   gcc_assert (is_valid != 0);
12055
12056   if (quad)
12057     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12058   else
12059     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12060
12061   return templ;
12062 }
12063
12064 /* Return a string suitable for output of Neon immediate shift operation
12065    (VSHR or VSHL) MNEM.  */
12066
12067 char *
12068 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12069                              machine_mode mode, int quad,
12070                              bool isleftshift)
12071 {
12072   int width, is_valid;
12073   static char templ[40];
12074
12075   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12076   gcc_assert (is_valid != 0);
12077
12078   if (quad)
12079     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12080   else
12081     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12082
12083   return templ;
12084 }
12085
12086 /* Output a sequence of pairwise operations to implement a reduction.
12087    NOTE: We do "too much work" here, because pairwise operations work on two
12088    registers-worth of operands in one go. Unfortunately we can't exploit those
12089    extra calculations to do the full operation in fewer steps, I don't think.
12090    Although all vector elements of the result but the first are ignored, we
12091    actually calculate the same result in each of the elements. An alternative
12092    such as initially loading a vector with zero to use as each of the second
12093    operands would use up an additional register and take an extra instruction,
12094    for no particular gain.  */
12095
12096 void
12097 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12098                       rtx (*reduc) (rtx, rtx, rtx))
12099 {
12100   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12101   rtx tmpsum = op1;
12102
12103   for (i = parts / 2; i >= 1; i /= 2)
12104     {
12105       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12106       emit_insn (reduc (dest, tmpsum, tmpsum));
12107       tmpsum = dest;
12108     }
12109 }
12110
12111 /* If VALS is a vector constant that can be loaded into a register
12112    using VDUP, generate instructions to do so and return an RTX to
12113    assign to the register.  Otherwise return NULL_RTX.  */
12114
12115 static rtx
12116 neon_vdup_constant (rtx vals)
12117 {
12118   machine_mode mode = GET_MODE (vals);
12119   machine_mode inner_mode = GET_MODE_INNER (mode);
12120   rtx x;
12121
12122   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12123     return NULL_RTX;
12124
12125   if (!const_vec_duplicate_p (vals, &x))
12126     /* The elements are not all the same.  We could handle repeating
12127        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12128        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12129        vdup.i16).  */
12130     return NULL_RTX;
12131
12132   /* We can load this constant by using VDUP and a constant in a
12133      single ARM register.  This will be cheaper than a vector
12134      load.  */
12135
12136   x = copy_to_mode_reg (inner_mode, x);
12137   return gen_rtx_VEC_DUPLICATE (mode, x);
12138 }
12139
12140 /* Generate code to load VALS, which is a PARALLEL containing only
12141    constants (for vec_init) or CONST_VECTOR, efficiently into a
12142    register.  Returns an RTX to copy into the register, or NULL_RTX
12143    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12144
12145 rtx
12146 neon_make_constant (rtx vals)
12147 {
12148   machine_mode mode = GET_MODE (vals);
12149   rtx target;
12150   rtx const_vec = NULL_RTX;
12151   int n_elts = GET_MODE_NUNITS (mode);
12152   int n_const = 0;
12153   int i;
12154
12155   if (GET_CODE (vals) == CONST_VECTOR)
12156     const_vec = vals;
12157   else if (GET_CODE (vals) == PARALLEL)
12158     {
12159       /* A CONST_VECTOR must contain only CONST_INTs and
12160          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12161          Only store valid constants in a CONST_VECTOR.  */
12162       for (i = 0; i < n_elts; ++i)
12163         {
12164           rtx x = XVECEXP (vals, 0, i);
12165           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12166             n_const++;
12167         }
12168       if (n_const == n_elts)
12169         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12170     }
12171   else
12172     gcc_unreachable ();
12173
12174   if (const_vec != NULL
12175       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12176     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12177     return const_vec;
12178   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12179     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12180        pipeline cycle; creating the constant takes one or two ARM
12181        pipeline cycles.  */
12182     return target;
12183   else if (const_vec != NULL_RTX)
12184     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12185        (for either double or quad vectors).  We can not take advantage
12186        of single-cycle VLD1 because we need a PC-relative addressing
12187        mode.  */
12188     return const_vec;
12189   else
12190     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12191        We can not construct an initializer.  */
12192     return NULL_RTX;
12193 }
12194
12195 /* Initialize vector TARGET to VALS.  */
12196
12197 void
12198 neon_expand_vector_init (rtx target, rtx vals)
12199 {
12200   machine_mode mode = GET_MODE (target);
12201   machine_mode inner_mode = GET_MODE_INNER (mode);
12202   int n_elts = GET_MODE_NUNITS (mode);
12203   int n_var = 0, one_var = -1;
12204   bool all_same = true;
12205   rtx x, mem;
12206   int i;
12207
12208   for (i = 0; i < n_elts; ++i)
12209     {
12210       x = XVECEXP (vals, 0, i);
12211       if (!CONSTANT_P (x))
12212         ++n_var, one_var = i;
12213
12214       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12215         all_same = false;
12216     }
12217
12218   if (n_var == 0)
12219     {
12220       rtx constant = neon_make_constant (vals);
12221       if (constant != NULL_RTX)
12222         {
12223           emit_move_insn (target, constant);
12224           return;
12225         }
12226     }
12227
12228   /* Splat a single non-constant element if we can.  */
12229   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12230     {
12231       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12232       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12233       return;
12234     }
12235
12236   /* One field is non-constant.  Load constant then overwrite varying
12237      field.  This is more efficient than using the stack.  */
12238   if (n_var == 1)
12239     {
12240       rtx copy = copy_rtx (vals);
12241       rtx index = GEN_INT (one_var);
12242
12243       /* Load constant part of vector, substitute neighboring value for
12244          varying element.  */
12245       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12246       neon_expand_vector_init (target, copy);
12247
12248       /* Insert variable.  */
12249       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12250       switch (mode)
12251         {
12252         case E_V8QImode:
12253           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12254           break;
12255         case E_V16QImode:
12256           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12257           break;
12258         case E_V4HImode:
12259           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12260           break;
12261         case E_V8HImode:
12262           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12263           break;
12264         case E_V2SImode:
12265           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12266           break;
12267         case E_V4SImode:
12268           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12269           break;
12270         case E_V2SFmode:
12271           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12272           break;
12273         case E_V4SFmode:
12274           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12275           break;
12276         case E_V2DImode:
12277           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12278           break;
12279         default:
12280           gcc_unreachable ();
12281         }
12282       return;
12283     }
12284
12285   /* Construct the vector in memory one field at a time
12286      and load the whole vector.  */
12287   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12288   for (i = 0; i < n_elts; i++)
12289     emit_move_insn (adjust_address_nv (mem, inner_mode,
12290                                     i * GET_MODE_SIZE (inner_mode)),
12291                     XVECEXP (vals, 0, i));
12292   emit_move_insn (target, mem);
12293 }
12294
12295 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12296    ERR if it doesn't.  EXP indicates the source location, which includes the
12297    inlining history for intrinsics.  */
12298
12299 static void
12300 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12301               const_tree exp, const char *desc)
12302 {
12303   HOST_WIDE_INT lane;
12304
12305   gcc_assert (CONST_INT_P (operand));
12306
12307   lane = INTVAL (operand);
12308
12309   if (lane < low || lane >= high)
12310     {
12311       if (exp)
12312         error ("%K%s %wd out of range %wd - %wd",
12313                exp, desc, lane, low, high - 1);
12314       else
12315         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12316     }
12317 }
12318
12319 /* Bounds-check lanes.  */
12320
12321 void
12322 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12323                   const_tree exp)
12324 {
12325   bounds_check (operand, low, high, exp, "lane");
12326 }
12327
12328 /* Bounds-check constants.  */
12329
12330 void
12331 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12332 {
12333   bounds_check (operand, low, high, NULL_TREE, "constant");
12334 }
12335
12336 HOST_WIDE_INT
12337 neon_element_bits (machine_mode mode)
12338 {
12339   return GET_MODE_UNIT_BITSIZE (mode);
12340 }
12341
12342 \f
12343 /* Predicates for `match_operand' and `match_operator'.  */
12344
12345 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12346    WB is true if full writeback address modes are allowed and is false
12347    if limited writeback address modes (POST_INC and PRE_DEC) are
12348    allowed.  */
12349
12350 int
12351 arm_coproc_mem_operand (rtx op, bool wb)
12352 {
12353   rtx ind;
12354
12355   /* Reject eliminable registers.  */
12356   if (! (reload_in_progress || reload_completed || lra_in_progress)
12357       && (   reg_mentioned_p (frame_pointer_rtx, op)
12358           || reg_mentioned_p (arg_pointer_rtx, op)
12359           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12360           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12361           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12362           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12363     return FALSE;
12364
12365   /* Constants are converted into offsets from labels.  */
12366   if (!MEM_P (op))
12367     return FALSE;
12368
12369   ind = XEXP (op, 0);
12370
12371   if (reload_completed
12372       && (GET_CODE (ind) == LABEL_REF
12373           || (GET_CODE (ind) == CONST
12374               && GET_CODE (XEXP (ind, 0)) == PLUS
12375               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12376               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12377     return TRUE;
12378
12379   /* Match: (mem (reg)).  */
12380   if (REG_P (ind))
12381     return arm_address_register_rtx_p (ind, 0);
12382
12383   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12384      acceptable in any case (subject to verification by
12385      arm_address_register_rtx_p).  We need WB to be true to accept
12386      PRE_INC and POST_DEC.  */
12387   if (GET_CODE (ind) == POST_INC
12388       || GET_CODE (ind) == PRE_DEC
12389       || (wb
12390           && (GET_CODE (ind) == PRE_INC
12391               || GET_CODE (ind) == POST_DEC)))
12392     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12393
12394   if (wb
12395       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12396       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12397       && GET_CODE (XEXP (ind, 1)) == PLUS
12398       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12399     ind = XEXP (ind, 1);
12400
12401   /* Match:
12402      (plus (reg)
12403            (const)).  */
12404   if (GET_CODE (ind) == PLUS
12405       && REG_P (XEXP (ind, 0))
12406       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12407       && CONST_INT_P (XEXP (ind, 1))
12408       && INTVAL (XEXP (ind, 1)) > -1024
12409       && INTVAL (XEXP (ind, 1)) <  1024
12410       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12411     return TRUE;
12412
12413   return FALSE;
12414 }
12415
12416 /* Return TRUE if OP is a memory operand which we can load or store a vector
12417    to/from. TYPE is one of the following values:
12418     0 - Vector load/stor (vldr)
12419     1 - Core registers (ldm)
12420     2 - Element/structure loads (vld1)
12421  */
12422 int
12423 neon_vector_mem_operand (rtx op, int type, bool strict)
12424 {
12425   rtx ind;
12426
12427   /* Reject eliminable registers.  */
12428   if (strict && ! (reload_in_progress || reload_completed)
12429       && (reg_mentioned_p (frame_pointer_rtx, op)
12430           || reg_mentioned_p (arg_pointer_rtx, op)
12431           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12432           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12433           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12434           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12435     return FALSE;
12436
12437   /* Constants are converted into offsets from labels.  */
12438   if (!MEM_P (op))
12439     return FALSE;
12440
12441   ind = XEXP (op, 0);
12442
12443   if (reload_completed
12444       && (GET_CODE (ind) == LABEL_REF
12445           || (GET_CODE (ind) == CONST
12446               && GET_CODE (XEXP (ind, 0)) == PLUS
12447               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12448               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12449     return TRUE;
12450
12451   /* Match: (mem (reg)).  */
12452   if (REG_P (ind))
12453     return arm_address_register_rtx_p (ind, 0);
12454
12455   /* Allow post-increment with Neon registers.  */
12456   if ((type != 1 && GET_CODE (ind) == POST_INC)
12457       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12458     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12459
12460   /* Allow post-increment by register for VLDn */
12461   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12462       && GET_CODE (XEXP (ind, 1)) == PLUS
12463       && REG_P (XEXP (XEXP (ind, 1), 1)))
12464      return true;
12465
12466   /* Match:
12467      (plus (reg)
12468           (const)).  */
12469   if (type == 0
12470       && GET_CODE (ind) == PLUS
12471       && REG_P (XEXP (ind, 0))
12472       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12473       && CONST_INT_P (XEXP (ind, 1))
12474       && INTVAL (XEXP (ind, 1)) > -1024
12475       /* For quad modes, we restrict the constant offset to be slightly less
12476          than what the instruction format permits.  We have no such constraint
12477          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12478       && (INTVAL (XEXP (ind, 1))
12479           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12480       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12481     return TRUE;
12482
12483   return FALSE;
12484 }
12485
12486 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12487    type.  */
12488 int
12489 neon_struct_mem_operand (rtx op)
12490 {
12491   rtx ind;
12492
12493   /* Reject eliminable registers.  */
12494   if (! (reload_in_progress || reload_completed)
12495       && (   reg_mentioned_p (frame_pointer_rtx, op)
12496           || reg_mentioned_p (arg_pointer_rtx, op)
12497           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12498           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12499           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12500           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12501     return FALSE;
12502
12503   /* Constants are converted into offsets from labels.  */
12504   if (!MEM_P (op))
12505     return FALSE;
12506
12507   ind = XEXP (op, 0);
12508
12509   if (reload_completed
12510       && (GET_CODE (ind) == LABEL_REF
12511           || (GET_CODE (ind) == CONST
12512               && GET_CODE (XEXP (ind, 0)) == PLUS
12513               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12514               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12515     return TRUE;
12516
12517   /* Match: (mem (reg)).  */
12518   if (REG_P (ind))
12519     return arm_address_register_rtx_p (ind, 0);
12520
12521   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12522   if (GET_CODE (ind) == POST_INC
12523       || GET_CODE (ind) == PRE_DEC)
12524     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12525
12526   return FALSE;
12527 }
12528
12529 /* Return true if X is a register that will be eliminated later on.  */
12530 int
12531 arm_eliminable_register (rtx x)
12532 {
12533   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12534                        || REGNO (x) == ARG_POINTER_REGNUM
12535                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12536                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12537 }
12538
12539 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12540    coprocessor registers.  Otherwise return NO_REGS.  */
12541
12542 enum reg_class
12543 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12544 {
12545   if (mode == HFmode)
12546     {
12547       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12548         return GENERAL_REGS;
12549       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12550         return NO_REGS;
12551       return GENERAL_REGS;
12552     }
12553
12554   /* The neon move patterns handle all legitimate vector and struct
12555      addresses.  */
12556   if (TARGET_NEON
12557       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12558       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12559           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12560           || VALID_NEON_STRUCT_MODE (mode)))
12561     return NO_REGS;
12562
12563   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12564     return NO_REGS;
12565
12566   return GENERAL_REGS;
12567 }
12568
12569 /* Values which must be returned in the most-significant end of the return
12570    register.  */
12571
12572 static bool
12573 arm_return_in_msb (const_tree valtype)
12574 {
12575   return (TARGET_AAPCS_BASED
12576           && BYTES_BIG_ENDIAN
12577           && (AGGREGATE_TYPE_P (valtype)
12578               || TREE_CODE (valtype) == COMPLEX_TYPE
12579               || FIXED_POINT_TYPE_P (valtype)));
12580 }
12581
12582 /* Return TRUE if X references a SYMBOL_REF.  */
12583 int
12584 symbol_mentioned_p (rtx x)
12585 {
12586   const char * fmt;
12587   int i;
12588
12589   if (GET_CODE (x) == SYMBOL_REF)
12590     return 1;
12591
12592   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12593      are constant offsets, not symbols.  */
12594   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12595     return 0;
12596
12597   fmt = GET_RTX_FORMAT (GET_CODE (x));
12598
12599   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12600     {
12601       if (fmt[i] == 'E')
12602         {
12603           int j;
12604
12605           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12606             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12607               return 1;
12608         }
12609       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12610         return 1;
12611     }
12612
12613   return 0;
12614 }
12615
12616 /* Return TRUE if X references a LABEL_REF.  */
12617 int
12618 label_mentioned_p (rtx x)
12619 {
12620   const char * fmt;
12621   int i;
12622
12623   if (GET_CODE (x) == LABEL_REF)
12624     return 1;
12625
12626   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12627      instruction, but they are constant offsets, not symbols.  */
12628   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12629     return 0;
12630
12631   fmt = GET_RTX_FORMAT (GET_CODE (x));
12632   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12633     {
12634       if (fmt[i] == 'E')
12635         {
12636           int j;
12637
12638           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12639             if (label_mentioned_p (XVECEXP (x, i, j)))
12640               return 1;
12641         }
12642       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12643         return 1;
12644     }
12645
12646   return 0;
12647 }
12648
12649 int
12650 tls_mentioned_p (rtx x)
12651 {
12652   switch (GET_CODE (x))
12653     {
12654     case CONST:
12655       return tls_mentioned_p (XEXP (x, 0));
12656
12657     case UNSPEC:
12658       if (XINT (x, 1) == UNSPEC_TLS)
12659         return 1;
12660
12661     /* Fall through.  */
12662     default:
12663       return 0;
12664     }
12665 }
12666
12667 /* Must not copy any rtx that uses a pc-relative address.
12668    Also, disallow copying of load-exclusive instructions that
12669    may appear after splitting of compare-and-swap-style operations
12670    so as to prevent those loops from being transformed away from their
12671    canonical forms (see PR 69904).  */
12672
12673 static bool
12674 arm_cannot_copy_insn_p (rtx_insn *insn)
12675 {
12676   /* The tls call insn cannot be copied, as it is paired with a data
12677      word.  */
12678   if (recog_memoized (insn) == CODE_FOR_tlscall)
12679     return true;
12680
12681   subrtx_iterator::array_type array;
12682   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12683     {
12684       const_rtx x = *iter;
12685       if (GET_CODE (x) == UNSPEC
12686           && (XINT (x, 1) == UNSPEC_PIC_BASE
12687               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12688         return true;
12689     }
12690
12691   rtx set = single_set (insn);
12692   if (set)
12693     {
12694       rtx src = SET_SRC (set);
12695       if (GET_CODE (src) == ZERO_EXTEND)
12696         src = XEXP (src, 0);
12697
12698       /* Catch the load-exclusive and load-acquire operations.  */
12699       if (GET_CODE (src) == UNSPEC_VOLATILE
12700           && (XINT (src, 1) == VUNSPEC_LL
12701               || XINT (src, 1) == VUNSPEC_LAX))
12702         return true;
12703     }
12704   return false;
12705 }
12706
12707 enum rtx_code
12708 minmax_code (rtx x)
12709 {
12710   enum rtx_code code = GET_CODE (x);
12711
12712   switch (code)
12713     {
12714     case SMAX:
12715       return GE;
12716     case SMIN:
12717       return LE;
12718     case UMIN:
12719       return LEU;
12720     case UMAX:
12721       return GEU;
12722     default:
12723       gcc_unreachable ();
12724     }
12725 }
12726
12727 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12728
12729 bool
12730 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12731                         int *mask, bool *signed_sat)
12732 {
12733   /* The high bound must be a power of two minus one.  */
12734   int log = exact_log2 (INTVAL (hi_bound) + 1);
12735   if (log == -1)
12736     return false;
12737
12738   /* The low bound is either zero (for usat) or one less than the
12739      negation of the high bound (for ssat).  */
12740   if (INTVAL (lo_bound) == 0)
12741     {
12742       if (mask)
12743         *mask = log;
12744       if (signed_sat)
12745         *signed_sat = false;
12746
12747       return true;
12748     }
12749
12750   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12751     {
12752       if (mask)
12753         *mask = log + 1;
12754       if (signed_sat)
12755         *signed_sat = true;
12756
12757       return true;
12758     }
12759
12760   return false;
12761 }
12762
12763 /* Return 1 if memory locations are adjacent.  */
12764 int
12765 adjacent_mem_locations (rtx a, rtx b)
12766 {
12767   /* We don't guarantee to preserve the order of these memory refs.  */
12768   if (volatile_refs_p (a) || volatile_refs_p (b))
12769     return 0;
12770
12771   if ((REG_P (XEXP (a, 0))
12772        || (GET_CODE (XEXP (a, 0)) == PLUS
12773            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12774       && (REG_P (XEXP (b, 0))
12775           || (GET_CODE (XEXP (b, 0)) == PLUS
12776               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12777     {
12778       HOST_WIDE_INT val0 = 0, val1 = 0;
12779       rtx reg0, reg1;
12780       int val_diff;
12781
12782       if (GET_CODE (XEXP (a, 0)) == PLUS)
12783         {
12784           reg0 = XEXP (XEXP (a, 0), 0);
12785           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12786         }
12787       else
12788         reg0 = XEXP (a, 0);
12789
12790       if (GET_CODE (XEXP (b, 0)) == PLUS)
12791         {
12792           reg1 = XEXP (XEXP (b, 0), 0);
12793           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12794         }
12795       else
12796         reg1 = XEXP (b, 0);
12797
12798       /* Don't accept any offset that will require multiple
12799          instructions to handle, since this would cause the
12800          arith_adjacentmem pattern to output an overlong sequence.  */
12801       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12802         return 0;
12803
12804       /* Don't allow an eliminable register: register elimination can make
12805          the offset too large.  */
12806       if (arm_eliminable_register (reg0))
12807         return 0;
12808
12809       val_diff = val1 - val0;
12810
12811       if (arm_ld_sched)
12812         {
12813           /* If the target has load delay slots, then there's no benefit
12814              to using an ldm instruction unless the offset is zero and
12815              we are optimizing for size.  */
12816           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12817                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12818                   && (val_diff == 4 || val_diff == -4));
12819         }
12820
12821       return ((REGNO (reg0) == REGNO (reg1))
12822               && (val_diff == 4 || val_diff == -4));
12823     }
12824
12825   return 0;
12826 }
12827
12828 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12829    for load operations, false for store operations.  CONSECUTIVE is true
12830    if the register numbers in the operation must be consecutive in the register
12831    bank. RETURN_PC is true if value is to be loaded in PC.
12832    The pattern we are trying to match for load is:
12833      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12834       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12835        :
12836        :
12837       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12838      ]
12839      where
12840      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12841      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12842      3.  If consecutive is TRUE, then for kth register being loaded,
12843          REGNO (R_dk) = REGNO (R_d0) + k.
12844    The pattern for store is similar.  */
12845 bool
12846 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12847                      bool consecutive, bool return_pc)
12848 {
12849   HOST_WIDE_INT count = XVECLEN (op, 0);
12850   rtx reg, mem, addr;
12851   unsigned regno;
12852   unsigned first_regno;
12853   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12854   rtx elt;
12855   bool addr_reg_in_reglist = false;
12856   bool update = false;
12857   int reg_increment;
12858   int offset_adj;
12859   int regs_per_val;
12860
12861   /* If not in SImode, then registers must be consecutive
12862      (e.g., VLDM instructions for DFmode).  */
12863   gcc_assert ((mode == SImode) || consecutive);
12864   /* Setting return_pc for stores is illegal.  */
12865   gcc_assert (!return_pc || load);
12866
12867   /* Set up the increments and the regs per val based on the mode.  */
12868   reg_increment = GET_MODE_SIZE (mode);
12869   regs_per_val = reg_increment / 4;
12870   offset_adj = return_pc ? 1 : 0;
12871
12872   if (count <= 1
12873       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12874       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12875     return false;
12876
12877   /* Check if this is a write-back.  */
12878   elt = XVECEXP (op, 0, offset_adj);
12879   if (GET_CODE (SET_SRC (elt)) == PLUS)
12880     {
12881       i++;
12882       base = 1;
12883       update = true;
12884
12885       /* The offset adjustment must be the number of registers being
12886          popped times the size of a single register.  */
12887       if (!REG_P (SET_DEST (elt))
12888           || !REG_P (XEXP (SET_SRC (elt), 0))
12889           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12890           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12891           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12892              ((count - 1 - offset_adj) * reg_increment))
12893         return false;
12894     }
12895
12896   i = i + offset_adj;
12897   base = base + offset_adj;
12898   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12899      success depends on the type: VLDM can do just one reg,
12900      LDM must do at least two.  */
12901   if ((count <= i) && (mode == SImode))
12902       return false;
12903
12904   elt = XVECEXP (op, 0, i - 1);
12905   if (GET_CODE (elt) != SET)
12906     return false;
12907
12908   if (load)
12909     {
12910       reg = SET_DEST (elt);
12911       mem = SET_SRC (elt);
12912     }
12913   else
12914     {
12915       reg = SET_SRC (elt);
12916       mem = SET_DEST (elt);
12917     }
12918
12919   if (!REG_P (reg) || !MEM_P (mem))
12920     return false;
12921
12922   regno = REGNO (reg);
12923   first_regno = regno;
12924   addr = XEXP (mem, 0);
12925   if (GET_CODE (addr) == PLUS)
12926     {
12927       if (!CONST_INT_P (XEXP (addr, 1)))
12928         return false;
12929
12930       offset = INTVAL (XEXP (addr, 1));
12931       addr = XEXP (addr, 0);
12932     }
12933
12934   if (!REG_P (addr))
12935     return false;
12936
12937   /* Don't allow SP to be loaded unless it is also the base register. It
12938      guarantees that SP is reset correctly when an LDM instruction
12939      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12940   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12941     return false;
12942
12943   for (; i < count; i++)
12944     {
12945       elt = XVECEXP (op, 0, i);
12946       if (GET_CODE (elt) != SET)
12947         return false;
12948
12949       if (load)
12950         {
12951           reg = SET_DEST (elt);
12952           mem = SET_SRC (elt);
12953         }
12954       else
12955         {
12956           reg = SET_SRC (elt);
12957           mem = SET_DEST (elt);
12958         }
12959
12960       if (!REG_P (reg)
12961           || GET_MODE (reg) != mode
12962           || REGNO (reg) <= regno
12963           || (consecutive
12964               && (REGNO (reg) !=
12965                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12966           /* Don't allow SP to be loaded unless it is also the base register. It
12967              guarantees that SP is reset correctly when an LDM instruction
12968              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12969           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12970           || !MEM_P (mem)
12971           || GET_MODE (mem) != mode
12972           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12973                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12974                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12975                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12976                    offset + (i - base) * reg_increment))
12977               && (!REG_P (XEXP (mem, 0))
12978                   || offset + (i - base) * reg_increment != 0)))
12979         return false;
12980
12981       regno = REGNO (reg);
12982       if (regno == REGNO (addr))
12983         addr_reg_in_reglist = true;
12984     }
12985
12986   if (load)
12987     {
12988       if (update && addr_reg_in_reglist)
12989         return false;
12990
12991       /* For Thumb-1, address register is always modified - either by write-back
12992          or by explicit load.  If the pattern does not describe an update,
12993          then the address register must be in the list of loaded registers.  */
12994       if (TARGET_THUMB1)
12995         return update || addr_reg_in_reglist;
12996     }
12997
12998   return true;
12999 }
13000
13001 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13002    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13003    instruction.  ADD_OFFSET is nonzero if the base address register needs
13004    to be modified with an add instruction before we can use it.  */
13005
13006 static bool
13007 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13008                                  int nops, HOST_WIDE_INT add_offset)
13009  {
13010   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13011      if the offset isn't small enough.  The reason 2 ldrs are faster
13012      is because these ARMs are able to do more than one cache access
13013      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13014      whilst the ARM8 has a double bandwidth cache.  This means that
13015      these cores can do both an instruction fetch and a data fetch in
13016      a single cycle, so the trick of calculating the address into a
13017      scratch register (one of the result regs) and then doing a load
13018      multiple actually becomes slower (and no smaller in code size).
13019      That is the transformation
13020
13021         ldr     rd1, [rbase + offset]
13022         ldr     rd2, [rbase + offset + 4]
13023
13024      to
13025
13026         add     rd1, rbase, offset
13027         ldmia   rd1, {rd1, rd2}
13028
13029      produces worse code -- '3 cycles + any stalls on rd2' instead of
13030      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13031      access per cycle, the first sequence could never complete in less
13032      than 6 cycles, whereas the ldm sequence would only take 5 and
13033      would make better use of sequential accesses if not hitting the
13034      cache.
13035
13036      We cheat here and test 'arm_ld_sched' which we currently know to
13037      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13038      changes, then the test below needs to be reworked.  */
13039   if (nops == 2 && arm_ld_sched && add_offset != 0)
13040     return false;
13041
13042   /* XScale has load-store double instructions, but they have stricter
13043      alignment requirements than load-store multiple, so we cannot
13044      use them.
13045
13046      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13047      the pipeline until completion.
13048
13049         NREGS           CYCLES
13050           1               3
13051           2               4
13052           3               5
13053           4               6
13054
13055      An ldr instruction takes 1-3 cycles, but does not block the
13056      pipeline.
13057
13058         NREGS           CYCLES
13059           1              1-3
13060           2              2-6
13061           3              3-9
13062           4              4-12
13063
13064      Best case ldr will always win.  However, the more ldr instructions
13065      we issue, the less likely we are to be able to schedule them well.
13066      Using ldr instructions also increases code size.
13067
13068      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13069      for counts of 3 or 4 regs.  */
13070   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13071     return false;
13072   return true;
13073 }
13074
13075 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13076    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13077    an array ORDER which describes the sequence to use when accessing the
13078    offsets that produces an ascending order.  In this sequence, each
13079    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13080    must have been filled in with the lowest offset by the caller.
13081    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13082    we use to verify that ORDER produces an ascending order of registers.
13083    Return true if it was possible to construct such an order, false if
13084    not.  */
13085
13086 static bool
13087 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13088                       int *unsorted_regs)
13089 {
13090   int i;
13091   for (i = 1; i < nops; i++)
13092     {
13093       int j;
13094
13095       order[i] = order[i - 1];
13096       for (j = 0; j < nops; j++)
13097         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13098           {
13099             /* We must find exactly one offset that is higher than the
13100                previous one by 4.  */
13101             if (order[i] != order[i - 1])
13102               return false;
13103             order[i] = j;
13104           }
13105       if (order[i] == order[i - 1])
13106         return false;
13107       /* The register numbers must be ascending.  */
13108       if (unsorted_regs != NULL
13109           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13110         return false;
13111     }
13112   return true;
13113 }
13114
13115 /* Used to determine in a peephole whether a sequence of load
13116    instructions can be changed into a load-multiple instruction.
13117    NOPS is the number of separate load instructions we are examining.  The
13118    first NOPS entries in OPERANDS are the destination registers, the
13119    next NOPS entries are memory operands.  If this function is
13120    successful, *BASE is set to the common base register of the memory
13121    accesses; *LOAD_OFFSET is set to the first memory location's offset
13122    from that base register.
13123    REGS is an array filled in with the destination register numbers.
13124    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13125    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13126    the sequence of registers in REGS matches the loads from ascending memory
13127    locations, and the function verifies that the register numbers are
13128    themselves ascending.  If CHECK_REGS is false, the register numbers
13129    are stored in the order they are found in the operands.  */
13130 static int
13131 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13132                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13133 {
13134   int unsorted_regs[MAX_LDM_STM_OPS];
13135   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13136   int order[MAX_LDM_STM_OPS];
13137   rtx base_reg_rtx = NULL;
13138   int base_reg = -1;
13139   int i, ldm_case;
13140
13141   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13142      easily extended if required.  */
13143   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13144
13145   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13146
13147   /* Loop over the operands and check that the memory references are
13148      suitable (i.e. immediate offsets from the same base register).  At
13149      the same time, extract the target register, and the memory
13150      offsets.  */
13151   for (i = 0; i < nops; i++)
13152     {
13153       rtx reg;
13154       rtx offset;
13155
13156       /* Convert a subreg of a mem into the mem itself.  */
13157       if (GET_CODE (operands[nops + i]) == SUBREG)
13158         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13159
13160       gcc_assert (MEM_P (operands[nops + i]));
13161
13162       /* Don't reorder volatile memory references; it doesn't seem worth
13163          looking for the case where the order is ok anyway.  */
13164       if (MEM_VOLATILE_P (operands[nops + i]))
13165         return 0;
13166
13167       offset = const0_rtx;
13168
13169       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13170            || (GET_CODE (reg) == SUBREG
13171                && REG_P (reg = SUBREG_REG (reg))))
13172           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13173               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13174                   || (GET_CODE (reg) == SUBREG
13175                       && REG_P (reg = SUBREG_REG (reg))))
13176               && (CONST_INT_P (offset
13177                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13178         {
13179           if (i == 0)
13180             {
13181               base_reg = REGNO (reg);
13182               base_reg_rtx = reg;
13183               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13184                 return 0;
13185             }
13186           else if (base_reg != (int) REGNO (reg))
13187             /* Not addressed from the same base register.  */
13188             return 0;
13189
13190           unsorted_regs[i] = (REG_P (operands[i])
13191                               ? REGNO (operands[i])
13192                               : REGNO (SUBREG_REG (operands[i])));
13193
13194           /* If it isn't an integer register, or if it overwrites the
13195              base register but isn't the last insn in the list, then
13196              we can't do this.  */
13197           if (unsorted_regs[i] < 0
13198               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13199               || unsorted_regs[i] > 14
13200               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13201             return 0;
13202
13203           /* Don't allow SP to be loaded unless it is also the base
13204              register.  It guarantees that SP is reset correctly when
13205              an LDM instruction is interrupted.  Otherwise, we might
13206              end up with a corrupt stack.  */
13207           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13208             return 0;
13209
13210           unsorted_offsets[i] = INTVAL (offset);
13211           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13212             order[0] = i;
13213         }
13214       else
13215         /* Not a suitable memory address.  */
13216         return 0;
13217     }
13218
13219   /* All the useful information has now been extracted from the
13220      operands into unsorted_regs and unsorted_offsets; additionally,
13221      order[0] has been set to the lowest offset in the list.  Sort
13222      the offsets into order, verifying that they are adjacent, and
13223      check that the register numbers are ascending.  */
13224   if (!compute_offset_order (nops, unsorted_offsets, order,
13225                              check_regs ? unsorted_regs : NULL))
13226     return 0;
13227
13228   if (saved_order)
13229     memcpy (saved_order, order, sizeof order);
13230
13231   if (base)
13232     {
13233       *base = base_reg;
13234
13235       for (i = 0; i < nops; i++)
13236         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13237
13238       *load_offset = unsorted_offsets[order[0]];
13239     }
13240
13241   if (TARGET_THUMB1
13242       && !peep2_reg_dead_p (nops, base_reg_rtx))
13243     return 0;
13244
13245   if (unsorted_offsets[order[0]] == 0)
13246     ldm_case = 1; /* ldmia */
13247   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13248     ldm_case = 2; /* ldmib */
13249   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13250     ldm_case = 3; /* ldmda */
13251   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13252     ldm_case = 4; /* ldmdb */
13253   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13254            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13255     ldm_case = 5;
13256   else
13257     return 0;
13258
13259   if (!multiple_operation_profitable_p (false, nops,
13260                                         ldm_case == 5
13261                                         ? unsorted_offsets[order[0]] : 0))
13262     return 0;
13263
13264   return ldm_case;
13265 }
13266
13267 /* Used to determine in a peephole whether a sequence of store instructions can
13268    be changed into a store-multiple instruction.
13269    NOPS is the number of separate store instructions we are examining.
13270    NOPS_TOTAL is the total number of instructions recognized by the peephole
13271    pattern.
13272    The first NOPS entries in OPERANDS are the source registers, the next
13273    NOPS entries are memory operands.  If this function is successful, *BASE is
13274    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13275    to the first memory location's offset from that base register.  REGS is an
13276    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13277    likewise filled with the corresponding rtx's.
13278    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13279    numbers to an ascending order of stores.
13280    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13281    from ascending memory locations, and the function verifies that the register
13282    numbers are themselves ascending.  If CHECK_REGS is false, the register
13283    numbers are stored in the order they are found in the operands.  */
13284 static int
13285 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13286                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13287                          HOST_WIDE_INT *load_offset, bool check_regs)
13288 {
13289   int unsorted_regs[MAX_LDM_STM_OPS];
13290   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13291   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13292   int order[MAX_LDM_STM_OPS];
13293   int base_reg = -1;
13294   rtx base_reg_rtx = NULL;
13295   int i, stm_case;
13296
13297   /* Write back of base register is currently only supported for Thumb 1.  */
13298   int base_writeback = TARGET_THUMB1;
13299
13300   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13301      easily extended if required.  */
13302   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13303
13304   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13305
13306   /* Loop over the operands and check that the memory references are
13307      suitable (i.e. immediate offsets from the same base register).  At
13308      the same time, extract the target register, and the memory
13309      offsets.  */
13310   for (i = 0; i < nops; i++)
13311     {
13312       rtx reg;
13313       rtx offset;
13314
13315       /* Convert a subreg of a mem into the mem itself.  */
13316       if (GET_CODE (operands[nops + i]) == SUBREG)
13317         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13318
13319       gcc_assert (MEM_P (operands[nops + i]));
13320
13321       /* Don't reorder volatile memory references; it doesn't seem worth
13322          looking for the case where the order is ok anyway.  */
13323       if (MEM_VOLATILE_P (operands[nops + i]))
13324         return 0;
13325
13326       offset = const0_rtx;
13327
13328       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13329            || (GET_CODE (reg) == SUBREG
13330                && REG_P (reg = SUBREG_REG (reg))))
13331           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13332               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13333                   || (GET_CODE (reg) == SUBREG
13334                       && REG_P (reg = SUBREG_REG (reg))))
13335               && (CONST_INT_P (offset
13336                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13337         {
13338           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13339                                   ? operands[i] : SUBREG_REG (operands[i]));
13340           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13341
13342           if (i == 0)
13343             {
13344               base_reg = REGNO (reg);
13345               base_reg_rtx = reg;
13346               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13347                 return 0;
13348             }
13349           else if (base_reg != (int) REGNO (reg))
13350             /* Not addressed from the same base register.  */
13351             return 0;
13352
13353           /* If it isn't an integer register, then we can't do this.  */
13354           if (unsorted_regs[i] < 0
13355               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13356               /* The effects are unpredictable if the base register is
13357                  both updated and stored.  */
13358               || (base_writeback && unsorted_regs[i] == base_reg)
13359               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13360               || unsorted_regs[i] > 14)
13361             return 0;
13362
13363           unsorted_offsets[i] = INTVAL (offset);
13364           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13365             order[0] = i;
13366         }
13367       else
13368         /* Not a suitable memory address.  */
13369         return 0;
13370     }
13371
13372   /* All the useful information has now been extracted from the
13373      operands into unsorted_regs and unsorted_offsets; additionally,
13374      order[0] has been set to the lowest offset in the list.  Sort
13375      the offsets into order, verifying that they are adjacent, and
13376      check that the register numbers are ascending.  */
13377   if (!compute_offset_order (nops, unsorted_offsets, order,
13378                              check_regs ? unsorted_regs : NULL))
13379     return 0;
13380
13381   if (saved_order)
13382     memcpy (saved_order, order, sizeof order);
13383
13384   if (base)
13385     {
13386       *base = base_reg;
13387
13388       for (i = 0; i < nops; i++)
13389         {
13390           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13391           if (reg_rtxs)
13392             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13393         }
13394
13395       *load_offset = unsorted_offsets[order[0]];
13396     }
13397
13398   if (TARGET_THUMB1
13399       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13400     return 0;
13401
13402   if (unsorted_offsets[order[0]] == 0)
13403     stm_case = 1; /* stmia */
13404   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13405     stm_case = 2; /* stmib */
13406   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13407     stm_case = 3; /* stmda */
13408   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13409     stm_case = 4; /* stmdb */
13410   else
13411     return 0;
13412
13413   if (!multiple_operation_profitable_p (false, nops, 0))
13414     return 0;
13415
13416   return stm_case;
13417 }
13418 \f
13419 /* Routines for use in generating RTL.  */
13420
13421 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13422    the instruction; REGS and MEMS are arrays containing the operands.
13423    BASEREG is the base register to be used in addressing the memory operands.
13424    WBACK_OFFSET is nonzero if the instruction should update the base
13425    register.  */
13426
13427 static rtx
13428 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13429                          HOST_WIDE_INT wback_offset)
13430 {
13431   int i = 0, j;
13432   rtx result;
13433
13434   if (!multiple_operation_profitable_p (false, count, 0))
13435     {
13436       rtx seq;
13437
13438       start_sequence ();
13439
13440       for (i = 0; i < count; i++)
13441         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13442
13443       if (wback_offset != 0)
13444         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13445
13446       seq = get_insns ();
13447       end_sequence ();
13448
13449       return seq;
13450     }
13451
13452   result = gen_rtx_PARALLEL (VOIDmode,
13453                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13454   if (wback_offset != 0)
13455     {
13456       XVECEXP (result, 0, 0)
13457         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13458       i = 1;
13459       count++;
13460     }
13461
13462   for (j = 0; i < count; i++, j++)
13463     XVECEXP (result, 0, i)
13464       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13465
13466   return result;
13467 }
13468
13469 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13470    the instruction; REGS and MEMS are arrays containing the operands.
13471    BASEREG is the base register to be used in addressing the memory operands.
13472    WBACK_OFFSET is nonzero if the instruction should update the base
13473    register.  */
13474
13475 static rtx
13476 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13477                           HOST_WIDE_INT wback_offset)
13478 {
13479   int i = 0, j;
13480   rtx result;
13481
13482   if (GET_CODE (basereg) == PLUS)
13483     basereg = XEXP (basereg, 0);
13484
13485   if (!multiple_operation_profitable_p (false, count, 0))
13486     {
13487       rtx seq;
13488
13489       start_sequence ();
13490
13491       for (i = 0; i < count; i++)
13492         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13493
13494       if (wback_offset != 0)
13495         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13496
13497       seq = get_insns ();
13498       end_sequence ();
13499
13500       return seq;
13501     }
13502
13503   result = gen_rtx_PARALLEL (VOIDmode,
13504                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13505   if (wback_offset != 0)
13506     {
13507       XVECEXP (result, 0, 0)
13508         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13509       i = 1;
13510       count++;
13511     }
13512
13513   for (j = 0; i < count; i++, j++)
13514     XVECEXP (result, 0, i)
13515       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13516
13517   return result;
13518 }
13519
13520 /* Generate either a load-multiple or a store-multiple instruction.  This
13521    function can be used in situations where we can start with a single MEM
13522    rtx and adjust its address upwards.
13523    COUNT is the number of operations in the instruction, not counting a
13524    possible update of the base register.  REGS is an array containing the
13525    register operands.
13526    BASEREG is the base register to be used in addressing the memory operands,
13527    which are constructed from BASEMEM.
13528    WRITE_BACK specifies whether the generated instruction should include an
13529    update of the base register.
13530    OFFSETP is used to pass an offset to and from this function; this offset
13531    is not used when constructing the address (instead BASEMEM should have an
13532    appropriate offset in its address), it is used only for setting
13533    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13534
13535 static rtx
13536 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13537                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13538 {
13539   rtx mems[MAX_LDM_STM_OPS];
13540   HOST_WIDE_INT offset = *offsetp;
13541   int i;
13542
13543   gcc_assert (count <= MAX_LDM_STM_OPS);
13544
13545   if (GET_CODE (basereg) == PLUS)
13546     basereg = XEXP (basereg, 0);
13547
13548   for (i = 0; i < count; i++)
13549     {
13550       rtx addr = plus_constant (Pmode, basereg, i * 4);
13551       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13552       offset += 4;
13553     }
13554
13555   if (write_back)
13556     *offsetp = offset;
13557
13558   if (is_load)
13559     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13560                                     write_back ? 4 * count : 0);
13561   else
13562     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13563                                      write_back ? 4 * count : 0);
13564 }
13565
13566 rtx
13567 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13568                        rtx basemem, HOST_WIDE_INT *offsetp)
13569 {
13570   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13571                               offsetp);
13572 }
13573
13574 rtx
13575 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13576                         rtx basemem, HOST_WIDE_INT *offsetp)
13577 {
13578   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13579                               offsetp);
13580 }
13581
13582 /* Called from a peephole2 expander to turn a sequence of loads into an
13583    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13584    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13585    is true if we can reorder the registers because they are used commutatively
13586    subsequently.
13587    Returns true iff we could generate a new instruction.  */
13588
13589 bool
13590 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13591 {
13592   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13593   rtx mems[MAX_LDM_STM_OPS];
13594   int i, j, base_reg;
13595   rtx base_reg_rtx;
13596   HOST_WIDE_INT offset;
13597   int write_back = FALSE;
13598   int ldm_case;
13599   rtx addr;
13600
13601   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13602                                      &base_reg, &offset, !sort_regs);
13603
13604   if (ldm_case == 0)
13605     return false;
13606
13607   if (sort_regs)
13608     for (i = 0; i < nops - 1; i++)
13609       for (j = i + 1; j < nops; j++)
13610         if (regs[i] > regs[j])
13611           {
13612             int t = regs[i];
13613             regs[i] = regs[j];
13614             regs[j] = t;
13615           }
13616   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13617
13618   if (TARGET_THUMB1)
13619     {
13620       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13621       gcc_assert (ldm_case == 1 || ldm_case == 5);
13622       write_back = TRUE;
13623     }
13624
13625   if (ldm_case == 5)
13626     {
13627       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13628       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13629       offset = 0;
13630       if (!TARGET_THUMB1)
13631         base_reg_rtx = newbase;
13632     }
13633
13634   for (i = 0; i < nops; i++)
13635     {
13636       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13637       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13638                                               SImode, addr, 0);
13639     }
13640   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13641                                       write_back ? offset + i * 4 : 0));
13642   return true;
13643 }
13644
13645 /* Called from a peephole2 expander to turn a sequence of stores into an
13646    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13647    NOPS indicates how many separate stores we are trying to combine.
13648    Returns true iff we could generate a new instruction.  */
13649
13650 bool
13651 gen_stm_seq (rtx *operands, int nops)
13652 {
13653   int i;
13654   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13655   rtx mems[MAX_LDM_STM_OPS];
13656   int base_reg;
13657   rtx base_reg_rtx;
13658   HOST_WIDE_INT offset;
13659   int write_back = FALSE;
13660   int stm_case;
13661   rtx addr;
13662   bool base_reg_dies;
13663
13664   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13665                                       mem_order, &base_reg, &offset, true);
13666
13667   if (stm_case == 0)
13668     return false;
13669
13670   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13671
13672   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13673   if (TARGET_THUMB1)
13674     {
13675       gcc_assert (base_reg_dies);
13676       write_back = TRUE;
13677     }
13678
13679   if (stm_case == 5)
13680     {
13681       gcc_assert (base_reg_dies);
13682       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13683       offset = 0;
13684     }
13685
13686   addr = plus_constant (Pmode, base_reg_rtx, offset);
13687
13688   for (i = 0; i < nops; i++)
13689     {
13690       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13691       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13692                                               SImode, addr, 0);
13693     }
13694   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13695                                        write_back ? offset + i * 4 : 0));
13696   return true;
13697 }
13698
13699 /* Called from a peephole2 expander to turn a sequence of stores that are
13700    preceded by constant loads into an STM instruction.  OPERANDS are the
13701    operands found by the peephole matcher; NOPS indicates how many
13702    separate stores we are trying to combine; there are 2 * NOPS
13703    instructions in the peephole.
13704    Returns true iff we could generate a new instruction.  */
13705
13706 bool
13707 gen_const_stm_seq (rtx *operands, int nops)
13708 {
13709   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13710   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13711   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13712   rtx mems[MAX_LDM_STM_OPS];
13713   int base_reg;
13714   rtx base_reg_rtx;
13715   HOST_WIDE_INT offset;
13716   int write_back = FALSE;
13717   int stm_case;
13718   rtx addr;
13719   bool base_reg_dies;
13720   int i, j;
13721   HARD_REG_SET allocated;
13722
13723   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13724                                       mem_order, &base_reg, &offset, false);
13725
13726   if (stm_case == 0)
13727     return false;
13728
13729   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13730
13731   /* If the same register is used more than once, try to find a free
13732      register.  */
13733   CLEAR_HARD_REG_SET (allocated);
13734   for (i = 0; i < nops; i++)
13735     {
13736       for (j = i + 1; j < nops; j++)
13737         if (regs[i] == regs[j])
13738           {
13739             rtx t = peep2_find_free_register (0, nops * 2,
13740                                               TARGET_THUMB1 ? "l" : "r",
13741                                               SImode, &allocated);
13742             if (t == NULL_RTX)
13743               return false;
13744             reg_rtxs[i] = t;
13745             regs[i] = REGNO (t);
13746           }
13747     }
13748
13749   /* Compute an ordering that maps the register numbers to an ascending
13750      sequence.  */
13751   reg_order[0] = 0;
13752   for (i = 0; i < nops; i++)
13753     if (regs[i] < regs[reg_order[0]])
13754       reg_order[0] = i;
13755
13756   for (i = 1; i < nops; i++)
13757     {
13758       int this_order = reg_order[i - 1];
13759       for (j = 0; j < nops; j++)
13760         if (regs[j] > regs[reg_order[i - 1]]
13761             && (this_order == reg_order[i - 1]
13762                 || regs[j] < regs[this_order]))
13763           this_order = j;
13764       reg_order[i] = this_order;
13765     }
13766
13767   /* Ensure that registers that must be live after the instruction end
13768      up with the correct value.  */
13769   for (i = 0; i < nops; i++)
13770     {
13771       int this_order = reg_order[i];
13772       if ((this_order != mem_order[i]
13773            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13774           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13775         return false;
13776     }
13777
13778   /* Load the constants.  */
13779   for (i = 0; i < nops; i++)
13780     {
13781       rtx op = operands[2 * nops + mem_order[i]];
13782       sorted_regs[i] = regs[reg_order[i]];
13783       emit_move_insn (reg_rtxs[reg_order[i]], op);
13784     }
13785
13786   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13787
13788   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13789   if (TARGET_THUMB1)
13790     {
13791       gcc_assert (base_reg_dies);
13792       write_back = TRUE;
13793     }
13794
13795   if (stm_case == 5)
13796     {
13797       gcc_assert (base_reg_dies);
13798       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13799       offset = 0;
13800     }
13801
13802   addr = plus_constant (Pmode, base_reg_rtx, offset);
13803
13804   for (i = 0; i < nops; i++)
13805     {
13806       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13807       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13808                                               SImode, addr, 0);
13809     }
13810   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13811                                        write_back ? offset + i * 4 : 0));
13812   return true;
13813 }
13814
13815 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13816    unaligned copies on processors which support unaligned semantics for those
13817    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13818    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13819    An interleave factor of 1 (the minimum) will perform no interleaving.
13820    Load/store multiple are used for aligned addresses where possible.  */
13821
13822 static void
13823 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13824                                    HOST_WIDE_INT length,
13825                                    unsigned int interleave_factor)
13826 {
13827   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13828   int *regnos = XALLOCAVEC (int, interleave_factor);
13829   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13830   HOST_WIDE_INT i, j;
13831   HOST_WIDE_INT remaining = length, words;
13832   rtx halfword_tmp = NULL, byte_tmp = NULL;
13833   rtx dst, src;
13834   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13835   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13836   HOST_WIDE_INT srcoffset, dstoffset;
13837   HOST_WIDE_INT src_autoinc, dst_autoinc;
13838   rtx mem, addr;
13839
13840   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13841
13842   /* Use hard registers if we have aligned source or destination so we can use
13843      load/store multiple with contiguous registers.  */
13844   if (dst_aligned || src_aligned)
13845     for (i = 0; i < interleave_factor; i++)
13846       regs[i] = gen_rtx_REG (SImode, i);
13847   else
13848     for (i = 0; i < interleave_factor; i++)
13849       regs[i] = gen_reg_rtx (SImode);
13850
13851   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13852   src = copy_addr_to_reg (XEXP (srcbase, 0));
13853
13854   srcoffset = dstoffset = 0;
13855
13856   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13857      For copying the last bytes we want to subtract this offset again.  */
13858   src_autoinc = dst_autoinc = 0;
13859
13860   for (i = 0; i < interleave_factor; i++)
13861     regnos[i] = i;
13862
13863   /* Copy BLOCK_SIZE_BYTES chunks.  */
13864
13865   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13866     {
13867       /* Load words.  */
13868       if (src_aligned && interleave_factor > 1)
13869         {
13870           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13871                                             TRUE, srcbase, &srcoffset));
13872           src_autoinc += UNITS_PER_WORD * interleave_factor;
13873         }
13874       else
13875         {
13876           for (j = 0; j < interleave_factor; j++)
13877             {
13878               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13879                                                  - src_autoinc));
13880               mem = adjust_automodify_address (srcbase, SImode, addr,
13881                                                srcoffset + j * UNITS_PER_WORD);
13882               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13883             }
13884           srcoffset += block_size_bytes;
13885         }
13886
13887       /* Store words.  */
13888       if (dst_aligned && interleave_factor > 1)
13889         {
13890           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13891                                              TRUE, dstbase, &dstoffset));
13892           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13893         }
13894       else
13895         {
13896           for (j = 0; j < interleave_factor; j++)
13897             {
13898               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13899                                                  - dst_autoinc));
13900               mem = adjust_automodify_address (dstbase, SImode, addr,
13901                                                dstoffset + j * UNITS_PER_WORD);
13902               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13903             }
13904           dstoffset += block_size_bytes;
13905         }
13906
13907       remaining -= block_size_bytes;
13908     }
13909
13910   /* Copy any whole words left (note these aren't interleaved with any
13911      subsequent halfword/byte load/stores in the interests of simplicity).  */
13912
13913   words = remaining / UNITS_PER_WORD;
13914
13915   gcc_assert (words < interleave_factor);
13916
13917   if (src_aligned && words > 1)
13918     {
13919       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13920                                         &srcoffset));
13921       src_autoinc += UNITS_PER_WORD * words;
13922     }
13923   else
13924     {
13925       for (j = 0; j < words; j++)
13926         {
13927           addr = plus_constant (Pmode, src,
13928                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13929           mem = adjust_automodify_address (srcbase, SImode, addr,
13930                                            srcoffset + j * UNITS_PER_WORD);
13931           if (src_aligned)
13932             emit_move_insn (regs[j], mem);
13933           else
13934             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13935         }
13936       srcoffset += words * UNITS_PER_WORD;
13937     }
13938
13939   if (dst_aligned && words > 1)
13940     {
13941       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13942                                          &dstoffset));
13943       dst_autoinc += words * UNITS_PER_WORD;
13944     }
13945   else
13946     {
13947       for (j = 0; j < words; j++)
13948         {
13949           addr = plus_constant (Pmode, dst,
13950                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13951           mem = adjust_automodify_address (dstbase, SImode, addr,
13952                                            dstoffset + j * UNITS_PER_WORD);
13953           if (dst_aligned)
13954             emit_move_insn (mem, regs[j]);
13955           else
13956             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13957         }
13958       dstoffset += words * UNITS_PER_WORD;
13959     }
13960
13961   remaining -= words * UNITS_PER_WORD;
13962
13963   gcc_assert (remaining < 4);
13964
13965   /* Copy a halfword if necessary.  */
13966
13967   if (remaining >= 2)
13968     {
13969       halfword_tmp = gen_reg_rtx (SImode);
13970
13971       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13972       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13973       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13974
13975       /* Either write out immediately, or delay until we've loaded the last
13976          byte, depending on interleave factor.  */
13977       if (interleave_factor == 1)
13978         {
13979           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13980           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13981           emit_insn (gen_unaligned_storehi (mem,
13982                        gen_lowpart (HImode, halfword_tmp)));
13983           halfword_tmp = NULL;
13984           dstoffset += 2;
13985         }
13986
13987       remaining -= 2;
13988       srcoffset += 2;
13989     }
13990
13991   gcc_assert (remaining < 2);
13992
13993   /* Copy last byte.  */
13994
13995   if ((remaining & 1) != 0)
13996     {
13997       byte_tmp = gen_reg_rtx (SImode);
13998
13999       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14000       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14001       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14002
14003       if (interleave_factor == 1)
14004         {
14005           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14006           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14007           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14008           byte_tmp = NULL;
14009           dstoffset++;
14010         }
14011
14012       remaining--;
14013       srcoffset++;
14014     }
14015
14016   /* Store last halfword if we haven't done so already.  */
14017
14018   if (halfword_tmp)
14019     {
14020       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14021       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14022       emit_insn (gen_unaligned_storehi (mem,
14023                    gen_lowpart (HImode, halfword_tmp)));
14024       dstoffset += 2;
14025     }
14026
14027   /* Likewise for last byte.  */
14028
14029   if (byte_tmp)
14030     {
14031       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14032       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14033       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14034       dstoffset++;
14035     }
14036
14037   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14038 }
14039
14040 /* From mips_adjust_block_mem:
14041
14042    Helper function for doing a loop-based block operation on memory
14043    reference MEM.  Each iteration of the loop will operate on LENGTH
14044    bytes of MEM.
14045
14046    Create a new base register for use within the loop and point it to
14047    the start of MEM.  Create a new memory reference that uses this
14048    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14049
14050 static void
14051 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14052                       rtx *loop_mem)
14053 {
14054   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14055
14056   /* Although the new mem does not refer to a known location,
14057      it does keep up to LENGTH bytes of alignment.  */
14058   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14059   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14060 }
14061
14062 /* From mips_block_move_loop:
14063
14064    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14065    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14066    the memory regions do not overlap.  */
14067
14068 static void
14069 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14070                                unsigned int interleave_factor,
14071                                HOST_WIDE_INT bytes_per_iter)
14072 {
14073   rtx src_reg, dest_reg, final_src, test;
14074   HOST_WIDE_INT leftover;
14075
14076   leftover = length % bytes_per_iter;
14077   length -= leftover;
14078
14079   /* Create registers and memory references for use within the loop.  */
14080   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14081   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14082
14083   /* Calculate the value that SRC_REG should have after the last iteration of
14084      the loop.  */
14085   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14086                                    0, 0, OPTAB_WIDEN);
14087
14088   /* Emit the start of the loop.  */
14089   rtx_code_label *label = gen_label_rtx ();
14090   emit_label (label);
14091
14092   /* Emit the loop body.  */
14093   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14094                                      interleave_factor);
14095
14096   /* Move on to the next block.  */
14097   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14098   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14099
14100   /* Emit the loop condition.  */
14101   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14102   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14103
14104   /* Mop up any left-over bytes.  */
14105   if (leftover)
14106     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14107 }
14108
14109 /* Emit a block move when either the source or destination is unaligned (not
14110    aligned to a four-byte boundary).  This may need further tuning depending on
14111    core type, optimize_size setting, etc.  */
14112
14113 static int
14114 arm_movmemqi_unaligned (rtx *operands)
14115 {
14116   HOST_WIDE_INT length = INTVAL (operands[2]);
14117
14118   if (optimize_size)
14119     {
14120       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14121       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14122       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14123          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14124          or dst_aligned though: allow more interleaving in those cases since the
14125          resulting code can be smaller.  */
14126       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14127       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14128
14129       if (length > 12)
14130         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14131                                        interleave_factor, bytes_per_iter);
14132       else
14133         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14134                                            interleave_factor);
14135     }
14136   else
14137     {
14138       /* Note that the loop created by arm_block_move_unaligned_loop may be
14139          subject to loop unrolling, which makes tuning this condition a little
14140          redundant.  */
14141       if (length > 32)
14142         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14143       else
14144         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14145     }
14146
14147   return 1;
14148 }
14149
14150 int
14151 arm_gen_movmemqi (rtx *operands)
14152 {
14153   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14154   HOST_WIDE_INT srcoffset, dstoffset;
14155   rtx src, dst, srcbase, dstbase;
14156   rtx part_bytes_reg = NULL;
14157   rtx mem;
14158
14159   if (!CONST_INT_P (operands[2])
14160       || !CONST_INT_P (operands[3])
14161       || INTVAL (operands[2]) > 64)
14162     return 0;
14163
14164   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14165     return arm_movmemqi_unaligned (operands);
14166
14167   if (INTVAL (operands[3]) & 3)
14168     return 0;
14169
14170   dstbase = operands[0];
14171   srcbase = operands[1];
14172
14173   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14174   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14175
14176   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14177   out_words_to_go = INTVAL (operands[2]) / 4;
14178   last_bytes = INTVAL (operands[2]) & 3;
14179   dstoffset = srcoffset = 0;
14180
14181   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14182     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14183
14184   while (in_words_to_go >= 2)
14185     {
14186       if (in_words_to_go > 4)
14187         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14188                                           TRUE, srcbase, &srcoffset));
14189       else
14190         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14191                                           src, FALSE, srcbase,
14192                                           &srcoffset));
14193
14194       if (out_words_to_go)
14195         {
14196           if (out_words_to_go > 4)
14197             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14198                                                TRUE, dstbase, &dstoffset));
14199           else if (out_words_to_go != 1)
14200             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14201                                                out_words_to_go, dst,
14202                                                (last_bytes == 0
14203                                                 ? FALSE : TRUE),
14204                                                dstbase, &dstoffset));
14205           else
14206             {
14207               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14208               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14209               if (last_bytes != 0)
14210                 {
14211                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14212                   dstoffset += 4;
14213                 }
14214             }
14215         }
14216
14217       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14218       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14219     }
14220
14221   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14222   if (out_words_to_go)
14223     {
14224       rtx sreg;
14225
14226       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14227       sreg = copy_to_reg (mem);
14228
14229       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14230       emit_move_insn (mem, sreg);
14231       in_words_to_go--;
14232
14233       gcc_assert (!in_words_to_go);     /* Sanity check */
14234     }
14235
14236   if (in_words_to_go)
14237     {
14238       gcc_assert (in_words_to_go > 0);
14239
14240       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14241       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14242     }
14243
14244   gcc_assert (!last_bytes || part_bytes_reg);
14245
14246   if (BYTES_BIG_ENDIAN && last_bytes)
14247     {
14248       rtx tmp = gen_reg_rtx (SImode);
14249
14250       /* The bytes we want are in the top end of the word.  */
14251       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14252                               GEN_INT (8 * (4 - last_bytes))));
14253       part_bytes_reg = tmp;
14254
14255       while (last_bytes)
14256         {
14257           mem = adjust_automodify_address (dstbase, QImode,
14258                                            plus_constant (Pmode, dst,
14259                                                           last_bytes - 1),
14260                                            dstoffset + last_bytes - 1);
14261           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14262
14263           if (--last_bytes)
14264             {
14265               tmp = gen_reg_rtx (SImode);
14266               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14267               part_bytes_reg = tmp;
14268             }
14269         }
14270
14271     }
14272   else
14273     {
14274       if (last_bytes > 1)
14275         {
14276           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14277           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14278           last_bytes -= 2;
14279           if (last_bytes)
14280             {
14281               rtx tmp = gen_reg_rtx (SImode);
14282               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14283               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14284               part_bytes_reg = tmp;
14285               dstoffset += 2;
14286             }
14287         }
14288
14289       if (last_bytes)
14290         {
14291           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14292           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14293         }
14294     }
14295
14296   return 1;
14297 }
14298
14299 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14300 by mode size.  */
14301 inline static rtx
14302 next_consecutive_mem (rtx mem)
14303 {
14304   machine_mode mode = GET_MODE (mem);
14305   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14306   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14307
14308   return adjust_automodify_address (mem, mode, addr, offset);
14309 }
14310
14311 /* Copy using LDRD/STRD instructions whenever possible.
14312    Returns true upon success. */
14313 bool
14314 gen_movmem_ldrd_strd (rtx *operands)
14315 {
14316   unsigned HOST_WIDE_INT len;
14317   HOST_WIDE_INT align;
14318   rtx src, dst, base;
14319   rtx reg0;
14320   bool src_aligned, dst_aligned;
14321   bool src_volatile, dst_volatile;
14322
14323   gcc_assert (CONST_INT_P (operands[2]));
14324   gcc_assert (CONST_INT_P (operands[3]));
14325
14326   len = UINTVAL (operands[2]);
14327   if (len > 64)
14328     return false;
14329
14330   /* Maximum alignment we can assume for both src and dst buffers.  */
14331   align = INTVAL (operands[3]);
14332
14333   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14334     return false;
14335
14336   /* Place src and dst addresses in registers
14337      and update the corresponding mem rtx.  */
14338   dst = operands[0];
14339   dst_volatile = MEM_VOLATILE_P (dst);
14340   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14341   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14342   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14343
14344   src = operands[1];
14345   src_volatile = MEM_VOLATILE_P (src);
14346   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14347   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14348   src = adjust_automodify_address (src, VOIDmode, base, 0);
14349
14350   if (!unaligned_access && !(src_aligned && dst_aligned))
14351     return false;
14352
14353   if (src_volatile || dst_volatile)
14354     return false;
14355
14356   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14357   if (!(dst_aligned || src_aligned))
14358     return arm_gen_movmemqi (operands);
14359
14360   /* If the either src or dst is unaligned we'll be accessing it as pairs
14361      of unaligned SImode accesses.  Otherwise we can generate DImode
14362      ldrd/strd instructions.  */
14363   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14364   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14365
14366   while (len >= 8)
14367     {
14368       len -= 8;
14369       reg0 = gen_reg_rtx (DImode);
14370       rtx low_reg = NULL_RTX;
14371       rtx hi_reg = NULL_RTX;
14372
14373       if (!src_aligned || !dst_aligned)
14374         {
14375           low_reg = gen_lowpart (SImode, reg0);
14376           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14377         }
14378       if (src_aligned)
14379         emit_move_insn (reg0, src);
14380       else
14381         {
14382           emit_insn (gen_unaligned_loadsi (low_reg, src));
14383           src = next_consecutive_mem (src);
14384           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14385         }
14386
14387       if (dst_aligned)
14388         emit_move_insn (dst, reg0);
14389       else
14390         {
14391           emit_insn (gen_unaligned_storesi (dst, low_reg));
14392           dst = next_consecutive_mem (dst);
14393           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14394         }
14395
14396       src = next_consecutive_mem (src);
14397       dst = next_consecutive_mem (dst);
14398     }
14399
14400   gcc_assert (len < 8);
14401   if (len >= 4)
14402     {
14403       /* More than a word but less than a double-word to copy.  Copy a word.  */
14404       reg0 = gen_reg_rtx (SImode);
14405       src = adjust_address (src, SImode, 0);
14406       dst = adjust_address (dst, SImode, 0);
14407       if (src_aligned)
14408         emit_move_insn (reg0, src);
14409       else
14410         emit_insn (gen_unaligned_loadsi (reg0, src));
14411
14412       if (dst_aligned)
14413         emit_move_insn (dst, reg0);
14414       else
14415         emit_insn (gen_unaligned_storesi (dst, reg0));
14416
14417       src = next_consecutive_mem (src);
14418       dst = next_consecutive_mem (dst);
14419       len -= 4;
14420     }
14421
14422   if (len == 0)
14423     return true;
14424
14425   /* Copy the remaining bytes.  */
14426   if (len >= 2)
14427     {
14428       dst = adjust_address (dst, HImode, 0);
14429       src = adjust_address (src, HImode, 0);
14430       reg0 = gen_reg_rtx (SImode);
14431       if (src_aligned)
14432         emit_insn (gen_zero_extendhisi2 (reg0, src));
14433       else
14434         emit_insn (gen_unaligned_loadhiu (reg0, src));
14435
14436       if (dst_aligned)
14437         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14438       else
14439         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14440
14441       src = next_consecutive_mem (src);
14442       dst = next_consecutive_mem (dst);
14443       if (len == 2)
14444         return true;
14445     }
14446
14447   dst = adjust_address (dst, QImode, 0);
14448   src = adjust_address (src, QImode, 0);
14449   reg0 = gen_reg_rtx (QImode);
14450   emit_move_insn (reg0, src);
14451   emit_move_insn (dst, reg0);
14452   return true;
14453 }
14454
14455 /* Select a dominance comparison mode if possible for a test of the general
14456    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14457    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14458    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14459    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14460    In all cases OP will be either EQ or NE, but we don't need to know which
14461    here.  If we are unable to support a dominance comparison we return
14462    CC mode.  This will then fail to match for the RTL expressions that
14463    generate this call.  */
14464 machine_mode
14465 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14466 {
14467   enum rtx_code cond1, cond2;
14468   int swapped = 0;
14469
14470   /* Currently we will probably get the wrong result if the individual
14471      comparisons are not simple.  This also ensures that it is safe to
14472      reverse a comparison if necessary.  */
14473   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14474        != CCmode)
14475       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14476           != CCmode))
14477     return CCmode;
14478
14479   /* The if_then_else variant of this tests the second condition if the
14480      first passes, but is true if the first fails.  Reverse the first
14481      condition to get a true "inclusive-or" expression.  */
14482   if (cond_or == DOM_CC_NX_OR_Y)
14483     cond1 = reverse_condition (cond1);
14484
14485   /* If the comparisons are not equal, and one doesn't dominate the other,
14486      then we can't do this.  */
14487   if (cond1 != cond2
14488       && !comparison_dominates_p (cond1, cond2)
14489       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14490     return CCmode;
14491
14492   if (swapped)
14493     std::swap (cond1, cond2);
14494
14495   switch (cond1)
14496     {
14497     case EQ:
14498       if (cond_or == DOM_CC_X_AND_Y)
14499         return CC_DEQmode;
14500
14501       switch (cond2)
14502         {
14503         case EQ: return CC_DEQmode;
14504         case LE: return CC_DLEmode;
14505         case LEU: return CC_DLEUmode;
14506         case GE: return CC_DGEmode;
14507         case GEU: return CC_DGEUmode;
14508         default: gcc_unreachable ();
14509         }
14510
14511     case LT:
14512       if (cond_or == DOM_CC_X_AND_Y)
14513         return CC_DLTmode;
14514
14515       switch (cond2)
14516         {
14517         case  LT:
14518             return CC_DLTmode;
14519         case LE:
14520           return CC_DLEmode;
14521         case NE:
14522           return CC_DNEmode;
14523         default:
14524           gcc_unreachable ();
14525         }
14526
14527     case GT:
14528       if (cond_or == DOM_CC_X_AND_Y)
14529         return CC_DGTmode;
14530
14531       switch (cond2)
14532         {
14533         case GT:
14534           return CC_DGTmode;
14535         case GE:
14536           return CC_DGEmode;
14537         case NE:
14538           return CC_DNEmode;
14539         default:
14540           gcc_unreachable ();
14541         }
14542
14543     case LTU:
14544       if (cond_or == DOM_CC_X_AND_Y)
14545         return CC_DLTUmode;
14546
14547       switch (cond2)
14548         {
14549         case LTU:
14550           return CC_DLTUmode;
14551         case LEU:
14552           return CC_DLEUmode;
14553         case NE:
14554           return CC_DNEmode;
14555         default:
14556           gcc_unreachable ();
14557         }
14558
14559     case GTU:
14560       if (cond_or == DOM_CC_X_AND_Y)
14561         return CC_DGTUmode;
14562
14563       switch (cond2)
14564         {
14565         case GTU:
14566           return CC_DGTUmode;
14567         case GEU:
14568           return CC_DGEUmode;
14569         case NE:
14570           return CC_DNEmode;
14571         default:
14572           gcc_unreachable ();
14573         }
14574
14575     /* The remaining cases only occur when both comparisons are the
14576        same.  */
14577     case NE:
14578       gcc_assert (cond1 == cond2);
14579       return CC_DNEmode;
14580
14581     case LE:
14582       gcc_assert (cond1 == cond2);
14583       return CC_DLEmode;
14584
14585     case GE:
14586       gcc_assert (cond1 == cond2);
14587       return CC_DGEmode;
14588
14589     case LEU:
14590       gcc_assert (cond1 == cond2);
14591       return CC_DLEUmode;
14592
14593     case GEU:
14594       gcc_assert (cond1 == cond2);
14595       return CC_DGEUmode;
14596
14597     default:
14598       gcc_unreachable ();
14599     }
14600 }
14601
14602 machine_mode
14603 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14604 {
14605   /* All floating point compares return CCFP if it is an equality
14606      comparison, and CCFPE otherwise.  */
14607   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14608     {
14609       switch (op)
14610         {
14611         case EQ:
14612         case NE:
14613         case UNORDERED:
14614         case ORDERED:
14615         case UNLT:
14616         case UNLE:
14617         case UNGT:
14618         case UNGE:
14619         case UNEQ:
14620         case LTGT:
14621           return CCFPmode;
14622
14623         case LT:
14624         case LE:
14625         case GT:
14626         case GE:
14627           return CCFPEmode;
14628
14629         default:
14630           gcc_unreachable ();
14631         }
14632     }
14633
14634   /* A compare with a shifted operand.  Because of canonicalization, the
14635      comparison will have to be swapped when we emit the assembler.  */
14636   if (GET_MODE (y) == SImode
14637       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14638       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14639           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14640           || GET_CODE (x) == ROTATERT))
14641     return CC_SWPmode;
14642
14643   /* This operation is performed swapped, but since we only rely on the Z
14644      flag we don't need an additional mode.  */
14645   if (GET_MODE (y) == SImode
14646       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14647       && GET_CODE (x) == NEG
14648       && (op == EQ || op == NE))
14649     return CC_Zmode;
14650
14651   /* This is a special case that is used by combine to allow a
14652      comparison of a shifted byte load to be split into a zero-extend
14653      followed by a comparison of the shifted integer (only valid for
14654      equalities and unsigned inequalities).  */
14655   if (GET_MODE (x) == SImode
14656       && GET_CODE (x) == ASHIFT
14657       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14658       && GET_CODE (XEXP (x, 0)) == SUBREG
14659       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14660       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14661       && (op == EQ || op == NE
14662           || op == GEU || op == GTU || op == LTU || op == LEU)
14663       && CONST_INT_P (y))
14664     return CC_Zmode;
14665
14666   /* A construct for a conditional compare, if the false arm contains
14667      0, then both conditions must be true, otherwise either condition
14668      must be true.  Not all conditions are possible, so CCmode is
14669      returned if it can't be done.  */
14670   if (GET_CODE (x) == IF_THEN_ELSE
14671       && (XEXP (x, 2) == const0_rtx
14672           || XEXP (x, 2) == const1_rtx)
14673       && COMPARISON_P (XEXP (x, 0))
14674       && COMPARISON_P (XEXP (x, 1)))
14675     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14676                                          INTVAL (XEXP (x, 2)));
14677
14678   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14679   if (GET_CODE (x) == AND
14680       && (op == EQ || op == NE)
14681       && COMPARISON_P (XEXP (x, 0))
14682       && COMPARISON_P (XEXP (x, 1)))
14683     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14684                                          DOM_CC_X_AND_Y);
14685
14686   if (GET_CODE (x) == IOR
14687       && (op == EQ || op == NE)
14688       && COMPARISON_P (XEXP (x, 0))
14689       && COMPARISON_P (XEXP (x, 1)))
14690     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14691                                          DOM_CC_X_OR_Y);
14692
14693   /* An operation (on Thumb) where we want to test for a single bit.
14694      This is done by shifting that bit up into the top bit of a
14695      scratch register; we can then branch on the sign bit.  */
14696   if (TARGET_THUMB1
14697       && GET_MODE (x) == SImode
14698       && (op == EQ || op == NE)
14699       && GET_CODE (x) == ZERO_EXTRACT
14700       && XEXP (x, 1) == const1_rtx)
14701     return CC_Nmode;
14702
14703   /* An operation that sets the condition codes as a side-effect, the
14704      V flag is not set correctly, so we can only use comparisons where
14705      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14706      instead.)  */
14707   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14708   if (GET_MODE (x) == SImode
14709       && y == const0_rtx
14710       && (op == EQ || op == NE || op == LT || op == GE)
14711       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14712           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14713           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14714           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14715           || GET_CODE (x) == LSHIFTRT
14716           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14717           || GET_CODE (x) == ROTATERT
14718           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14719     return CC_NOOVmode;
14720
14721   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14722     return CC_Zmode;
14723
14724   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14725       && GET_CODE (x) == PLUS
14726       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14727     return CC_Cmode;
14728
14729   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14730     {
14731       switch (op)
14732         {
14733         case EQ:
14734         case NE:
14735           /* A DImode comparison against zero can be implemented by
14736              or'ing the two halves together.  */
14737           if (y == const0_rtx)
14738             return CC_Zmode;
14739
14740           /* We can do an equality test in three Thumb instructions.  */
14741           if (!TARGET_32BIT)
14742             return CC_Zmode;
14743
14744           /* FALLTHROUGH */
14745
14746         case LTU:
14747         case LEU:
14748         case GTU:
14749         case GEU:
14750           /* DImode unsigned comparisons can be implemented by cmp +
14751              cmpeq without a scratch register.  Not worth doing in
14752              Thumb-2.  */
14753           if (TARGET_32BIT)
14754             return CC_CZmode;
14755
14756           /* FALLTHROUGH */
14757
14758         case LT:
14759         case LE:
14760         case GT:
14761         case GE:
14762           /* DImode signed and unsigned comparisons can be implemented
14763              by cmp + sbcs with a scratch register, but that does not
14764              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14765           gcc_assert (op != EQ && op != NE);
14766           return CC_NCVmode;
14767
14768         default:
14769           gcc_unreachable ();
14770         }
14771     }
14772
14773   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14774     return GET_MODE (x);
14775
14776   return CCmode;
14777 }
14778
14779 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14780    return the rtx for register 0 in the proper mode.  FP means this is a
14781    floating point compare: I don't think that it is needed on the arm.  */
14782 rtx
14783 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14784 {
14785   machine_mode mode;
14786   rtx cc_reg;
14787   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14788
14789   /* We might have X as a constant, Y as a register because of the predicates
14790      used for cmpdi.  If so, force X to a register here.  */
14791   if (dimode_comparison && !REG_P (x))
14792     x = force_reg (DImode, x);
14793
14794   mode = SELECT_CC_MODE (code, x, y);
14795   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14796
14797   if (dimode_comparison
14798       && mode != CC_CZmode)
14799     {
14800       rtx clobber, set;
14801
14802       /* To compare two non-zero values for equality, XOR them and
14803          then compare against zero.  Not used for ARM mode; there
14804          CC_CZmode is cheaper.  */
14805       if (mode == CC_Zmode && y != const0_rtx)
14806         {
14807           gcc_assert (!reload_completed);
14808           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14809           y = const0_rtx;
14810         }
14811
14812       /* A scratch register is required.  */
14813       if (reload_completed)
14814         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14815       else
14816         scratch = gen_rtx_SCRATCH (SImode);
14817
14818       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14819       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14820       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14821     }
14822   else
14823     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14824
14825   return cc_reg;
14826 }
14827
14828 /* Generate a sequence of insns that will generate the correct return
14829    address mask depending on the physical architecture that the program
14830    is running on.  */
14831 rtx
14832 arm_gen_return_addr_mask (void)
14833 {
14834   rtx reg = gen_reg_rtx (Pmode);
14835
14836   emit_insn (gen_return_addr_mask (reg));
14837   return reg;
14838 }
14839
14840 void
14841 arm_reload_in_hi (rtx *operands)
14842 {
14843   rtx ref = operands[1];
14844   rtx base, scratch;
14845   HOST_WIDE_INT offset = 0;
14846
14847   if (GET_CODE (ref) == SUBREG)
14848     {
14849       offset = SUBREG_BYTE (ref);
14850       ref = SUBREG_REG (ref);
14851     }
14852
14853   if (REG_P (ref))
14854     {
14855       /* We have a pseudo which has been spilt onto the stack; there
14856          are two cases here: the first where there is a simple
14857          stack-slot replacement and a second where the stack-slot is
14858          out of range, or is used as a subreg.  */
14859       if (reg_equiv_mem (REGNO (ref)))
14860         {
14861           ref = reg_equiv_mem (REGNO (ref));
14862           base = find_replacement (&XEXP (ref, 0));
14863         }
14864       else
14865         /* The slot is out of range, or was dressed up in a SUBREG.  */
14866         base = reg_equiv_address (REGNO (ref));
14867
14868       /* PR 62554: If there is no equivalent memory location then just move
14869          the value as an SImode register move.  This happens when the target
14870          architecture variant does not have an HImode register move.  */
14871       if (base == NULL)
14872         {
14873           gcc_assert (REG_P (operands[0]));
14874           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14875                                 gen_rtx_SUBREG (SImode, ref, 0)));
14876           return;
14877         }
14878     }
14879   else
14880     base = find_replacement (&XEXP (ref, 0));
14881
14882   /* Handle the case where the address is too complex to be offset by 1.  */
14883   if (GET_CODE (base) == MINUS
14884       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14885     {
14886       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14887
14888       emit_set_insn (base_plus, base);
14889       base = base_plus;
14890     }
14891   else if (GET_CODE (base) == PLUS)
14892     {
14893       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14894       HOST_WIDE_INT hi, lo;
14895
14896       offset += INTVAL (XEXP (base, 1));
14897       base = XEXP (base, 0);
14898
14899       /* Rework the address into a legal sequence of insns.  */
14900       /* Valid range for lo is -4095 -> 4095 */
14901       lo = (offset >= 0
14902             ? (offset & 0xfff)
14903             : -((-offset) & 0xfff));
14904
14905       /* Corner case, if lo is the max offset then we would be out of range
14906          once we have added the additional 1 below, so bump the msb into the
14907          pre-loading insn(s).  */
14908       if (lo == 4095)
14909         lo &= 0x7ff;
14910
14911       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14912              ^ (HOST_WIDE_INT) 0x80000000)
14913             - (HOST_WIDE_INT) 0x80000000);
14914
14915       gcc_assert (hi + lo == offset);
14916
14917       if (hi != 0)
14918         {
14919           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14920
14921           /* Get the base address; addsi3 knows how to handle constants
14922              that require more than one insn.  */
14923           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14924           base = base_plus;
14925           offset = lo;
14926         }
14927     }
14928
14929   /* Operands[2] may overlap operands[0] (though it won't overlap
14930      operands[1]), that's why we asked for a DImode reg -- so we can
14931      use the bit that does not overlap.  */
14932   if (REGNO (operands[2]) == REGNO (operands[0]))
14933     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14934   else
14935     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14936
14937   emit_insn (gen_zero_extendqisi2 (scratch,
14938                                    gen_rtx_MEM (QImode,
14939                                                 plus_constant (Pmode, base,
14940                                                                offset))));
14941   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14942                                    gen_rtx_MEM (QImode,
14943                                                 plus_constant (Pmode, base,
14944                                                                offset + 1))));
14945   if (!BYTES_BIG_ENDIAN)
14946     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14947                    gen_rtx_IOR (SImode,
14948                                 gen_rtx_ASHIFT
14949                                 (SImode,
14950                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14951                                  GEN_INT (8)),
14952                                 scratch));
14953   else
14954     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14955                    gen_rtx_IOR (SImode,
14956                                 gen_rtx_ASHIFT (SImode, scratch,
14957                                                 GEN_INT (8)),
14958                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14959 }
14960
14961 /* Handle storing a half-word to memory during reload by synthesizing as two
14962    byte stores.  Take care not to clobber the input values until after we
14963    have moved them somewhere safe.  This code assumes that if the DImode
14964    scratch in operands[2] overlaps either the input value or output address
14965    in some way, then that value must die in this insn (we absolutely need
14966    two scratch registers for some corner cases).  */
14967 void
14968 arm_reload_out_hi (rtx *operands)
14969 {
14970   rtx ref = operands[0];
14971   rtx outval = operands[1];
14972   rtx base, scratch;
14973   HOST_WIDE_INT offset = 0;
14974
14975   if (GET_CODE (ref) == SUBREG)
14976     {
14977       offset = SUBREG_BYTE (ref);
14978       ref = SUBREG_REG (ref);
14979     }
14980
14981   if (REG_P (ref))
14982     {
14983       /* We have a pseudo which has been spilt onto the stack; there
14984          are two cases here: the first where there is a simple
14985          stack-slot replacement and a second where the stack-slot is
14986          out of range, or is used as a subreg.  */
14987       if (reg_equiv_mem (REGNO (ref)))
14988         {
14989           ref = reg_equiv_mem (REGNO (ref));
14990           base = find_replacement (&XEXP (ref, 0));
14991         }
14992       else
14993         /* The slot is out of range, or was dressed up in a SUBREG.  */
14994         base = reg_equiv_address (REGNO (ref));
14995
14996       /* PR 62254: If there is no equivalent memory location then just move
14997          the value as an SImode register move.  This happens when the target
14998          architecture variant does not have an HImode register move.  */
14999       if (base == NULL)
15000         {
15001           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15002
15003           if (REG_P (outval))
15004             {
15005               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15006                                     gen_rtx_SUBREG (SImode, outval, 0)));
15007             }
15008           else /* SUBREG_P (outval)  */
15009             {
15010               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15011                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15012                                       SUBREG_REG (outval)));
15013               else
15014                 /* FIXME: Handle other cases ?  */
15015                 gcc_unreachable ();
15016             }
15017           return;
15018         }
15019     }
15020   else
15021     base = find_replacement (&XEXP (ref, 0));
15022
15023   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15024
15025   /* Handle the case where the address is too complex to be offset by 1.  */
15026   if (GET_CODE (base) == MINUS
15027       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15028     {
15029       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15030
15031       /* Be careful not to destroy OUTVAL.  */
15032       if (reg_overlap_mentioned_p (base_plus, outval))
15033         {
15034           /* Updating base_plus might destroy outval, see if we can
15035              swap the scratch and base_plus.  */
15036           if (!reg_overlap_mentioned_p (scratch, outval))
15037             std::swap (scratch, base_plus);
15038           else
15039             {
15040               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15041
15042               /* Be conservative and copy OUTVAL into the scratch now,
15043                  this should only be necessary if outval is a subreg
15044                  of something larger than a word.  */
15045               /* XXX Might this clobber base?  I can't see how it can,
15046                  since scratch is known to overlap with OUTVAL, and
15047                  must be wider than a word.  */
15048               emit_insn (gen_movhi (scratch_hi, outval));
15049               outval = scratch_hi;
15050             }
15051         }
15052
15053       emit_set_insn (base_plus, base);
15054       base = base_plus;
15055     }
15056   else if (GET_CODE (base) == PLUS)
15057     {
15058       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15059       HOST_WIDE_INT hi, lo;
15060
15061       offset += INTVAL (XEXP (base, 1));
15062       base = XEXP (base, 0);
15063
15064       /* Rework the address into a legal sequence of insns.  */
15065       /* Valid range for lo is -4095 -> 4095 */
15066       lo = (offset >= 0
15067             ? (offset & 0xfff)
15068             : -((-offset) & 0xfff));
15069
15070       /* Corner case, if lo is the max offset then we would be out of range
15071          once we have added the additional 1 below, so bump the msb into the
15072          pre-loading insn(s).  */
15073       if (lo == 4095)
15074         lo &= 0x7ff;
15075
15076       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15077              ^ (HOST_WIDE_INT) 0x80000000)
15078             - (HOST_WIDE_INT) 0x80000000);
15079
15080       gcc_assert (hi + lo == offset);
15081
15082       if (hi != 0)
15083         {
15084           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15085
15086           /* Be careful not to destroy OUTVAL.  */
15087           if (reg_overlap_mentioned_p (base_plus, outval))
15088             {
15089               /* Updating base_plus might destroy outval, see if we
15090                  can swap the scratch and base_plus.  */
15091               if (!reg_overlap_mentioned_p (scratch, outval))
15092                 std::swap (scratch, base_plus);
15093               else
15094                 {
15095                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15096
15097                   /* Be conservative and copy outval into scratch now,
15098                      this should only be necessary if outval is a
15099                      subreg of something larger than a word.  */
15100                   /* XXX Might this clobber base?  I can't see how it
15101                      can, since scratch is known to overlap with
15102                      outval.  */
15103                   emit_insn (gen_movhi (scratch_hi, outval));
15104                   outval = scratch_hi;
15105                 }
15106             }
15107
15108           /* Get the base address; addsi3 knows how to handle constants
15109              that require more than one insn.  */
15110           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15111           base = base_plus;
15112           offset = lo;
15113         }
15114     }
15115
15116   if (BYTES_BIG_ENDIAN)
15117     {
15118       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15119                                          plus_constant (Pmode, base,
15120                                                         offset + 1)),
15121                             gen_lowpart (QImode, outval)));
15122       emit_insn (gen_lshrsi3 (scratch,
15123                               gen_rtx_SUBREG (SImode, outval, 0),
15124                               GEN_INT (8)));
15125       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15126                                                                 offset)),
15127                             gen_lowpart (QImode, scratch)));
15128     }
15129   else
15130     {
15131       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15132                                                                 offset)),
15133                             gen_lowpart (QImode, outval)));
15134       emit_insn (gen_lshrsi3 (scratch,
15135                               gen_rtx_SUBREG (SImode, outval, 0),
15136                               GEN_INT (8)));
15137       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15138                                          plus_constant (Pmode, base,
15139                                                         offset + 1)),
15140                             gen_lowpart (QImode, scratch)));
15141     }
15142 }
15143
15144 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15145    (padded to the size of a word) should be passed in a register.  */
15146
15147 static bool
15148 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15149 {
15150   if (TARGET_AAPCS_BASED)
15151     return must_pass_in_stack_var_size (mode, type);
15152   else
15153     return must_pass_in_stack_var_size_or_pad (mode, type);
15154 }
15155
15156
15157 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15158    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15159    the default.  For AAPCS based ABIs small aggregate types are placed
15160    in the lowest memory address.  */
15161
15162 static pad_direction
15163 arm_function_arg_padding (machine_mode mode, const_tree type)
15164 {
15165   if (!TARGET_AAPCS_BASED)
15166     return default_function_arg_padding (mode, type);
15167
15168   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15169     return PAD_DOWNWARD;
15170
15171   return PAD_UPWARD;
15172 }
15173
15174
15175 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15176    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15177    register has useful data, and return the opposite if the most
15178    significant byte does.  */
15179
15180 bool
15181 arm_pad_reg_upward (machine_mode mode,
15182                     tree type, int first ATTRIBUTE_UNUSED)
15183 {
15184   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15185     {
15186       /* For AAPCS, small aggregates, small fixed-point types,
15187          and small complex types are always padded upwards.  */
15188       if (type)
15189         {
15190           if ((AGGREGATE_TYPE_P (type)
15191                || TREE_CODE (type) == COMPLEX_TYPE
15192                || FIXED_POINT_TYPE_P (type))
15193               && int_size_in_bytes (type) <= 4)
15194             return true;
15195         }
15196       else
15197         {
15198           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15199               && GET_MODE_SIZE (mode) <= 4)
15200             return true;
15201         }
15202     }
15203
15204   /* Otherwise, use default padding.  */
15205   return !BYTES_BIG_ENDIAN;
15206 }
15207
15208 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15209    assuming that the address in the base register is word aligned.  */
15210 bool
15211 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15212 {
15213   HOST_WIDE_INT max_offset;
15214
15215   /* Offset must be a multiple of 4 in Thumb mode.  */
15216   if (TARGET_THUMB2 && ((offset & 3) != 0))
15217     return false;
15218
15219   if (TARGET_THUMB2)
15220     max_offset = 1020;
15221   else if (TARGET_ARM)
15222     max_offset = 255;
15223   else
15224     return false;
15225
15226   return ((offset <= max_offset) && (offset >= -max_offset));
15227 }
15228
15229 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15230    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15231    Assumes that the address in the base register RN is word aligned.  Pattern
15232    guarantees that both memory accesses use the same base register,
15233    the offsets are constants within the range, and the gap between the offsets is 4.
15234    If preload complete then check that registers are legal.  WBACK indicates whether
15235    address is updated.  LOAD indicates whether memory access is load or store.  */
15236 bool
15237 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15238                        bool wback, bool load)
15239 {
15240   unsigned int t, t2, n;
15241
15242   if (!reload_completed)
15243     return true;
15244
15245   if (!offset_ok_for_ldrd_strd (offset))
15246     return false;
15247
15248   t = REGNO (rt);
15249   t2 = REGNO (rt2);
15250   n = REGNO (rn);
15251
15252   if ((TARGET_THUMB2)
15253       && ((wback && (n == t || n == t2))
15254           || (t == SP_REGNUM)
15255           || (t == PC_REGNUM)
15256           || (t2 == SP_REGNUM)
15257           || (t2 == PC_REGNUM)
15258           || (!load && (n == PC_REGNUM))
15259           || (load && (t == t2))
15260           /* Triggers Cortex-M3 LDRD errata.  */
15261           || (!wback && load && fix_cm3_ldrd && (n == t))))
15262     return false;
15263
15264   if ((TARGET_ARM)
15265       && ((wback && (n == t || n == t2))
15266           || (t2 == PC_REGNUM)
15267           || (t % 2 != 0)   /* First destination register is not even.  */
15268           || (t2 != t + 1)
15269           /* PC can be used as base register (for offset addressing only),
15270              but it is depricated.  */
15271           || (n == PC_REGNUM)))
15272     return false;
15273
15274   return true;
15275 }
15276
15277 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15278    operand MEM's address contains an immediate offset from the base
15279    register and has no side effects, in which case it sets BASE and
15280    OFFSET accordingly.  */
15281 static bool
15282 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15283 {
15284   rtx addr;
15285
15286   gcc_assert (base != NULL && offset != NULL);
15287
15288   /* TODO: Handle more general memory operand patterns, such as
15289      PRE_DEC and PRE_INC.  */
15290
15291   if (side_effects_p (mem))
15292     return false;
15293
15294   /* Can't deal with subregs.  */
15295   if (GET_CODE (mem) == SUBREG)
15296     return false;
15297
15298   gcc_assert (MEM_P (mem));
15299
15300   *offset = const0_rtx;
15301
15302   addr = XEXP (mem, 0);
15303
15304   /* If addr isn't valid for DImode, then we can't handle it.  */
15305   if (!arm_legitimate_address_p (DImode, addr,
15306                                  reload_in_progress || reload_completed))
15307     return false;
15308
15309   if (REG_P (addr))
15310     {
15311       *base = addr;
15312       return true;
15313     }
15314   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15315     {
15316       *base = XEXP (addr, 0);
15317       *offset = XEXP (addr, 1);
15318       return (REG_P (*base) && CONST_INT_P (*offset));
15319     }
15320
15321   return false;
15322 }
15323
15324 /* Called from a peephole2 to replace two word-size accesses with a
15325    single LDRD/STRD instruction.  Returns true iff we can generate a
15326    new instruction sequence.  That is, both accesses use the same base
15327    register and the gap between constant offsets is 4.  This function
15328    may reorder its operands to match ldrd/strd RTL templates.
15329    OPERANDS are the operands found by the peephole matcher;
15330    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15331    corresponding memory operands.  LOAD indicaates whether the access
15332    is load or store.  CONST_STORE indicates a store of constant
15333    integer values held in OPERANDS[4,5] and assumes that the pattern
15334    is of length 4 insn, for the purpose of checking dead registers.
15335    COMMUTE indicates that register operands may be reordered.  */
15336 bool
15337 gen_operands_ldrd_strd (rtx *operands, bool load,
15338                         bool const_store, bool commute)
15339 {
15340   int nops = 2;
15341   HOST_WIDE_INT offsets[2], offset;
15342   rtx base = NULL_RTX;
15343   rtx cur_base, cur_offset, tmp;
15344   int i, gap;
15345   HARD_REG_SET regset;
15346
15347   gcc_assert (!const_store || !load);
15348   /* Check that the memory references are immediate offsets from the
15349      same base register.  Extract the base register, the destination
15350      registers, and the corresponding memory offsets.  */
15351   for (i = 0; i < nops; i++)
15352     {
15353       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15354         return false;
15355
15356       if (i == 0)
15357         base = cur_base;
15358       else if (REGNO (base) != REGNO (cur_base))
15359         return false;
15360
15361       offsets[i] = INTVAL (cur_offset);
15362       if (GET_CODE (operands[i]) == SUBREG)
15363         {
15364           tmp = SUBREG_REG (operands[i]);
15365           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15366           operands[i] = tmp;
15367         }
15368     }
15369
15370   /* Make sure there is no dependency between the individual loads.  */
15371   if (load && REGNO (operands[0]) == REGNO (base))
15372     return false; /* RAW */
15373
15374   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15375     return false; /* WAW */
15376
15377   /* If the same input register is used in both stores
15378      when storing different constants, try to find a free register.
15379      For example, the code
15380         mov r0, 0
15381         str r0, [r2]
15382         mov r0, 1
15383         str r0, [r2, #4]
15384      can be transformed into
15385         mov r1, 0
15386         mov r0, 1
15387         strd r1, r0, [r2]
15388      in Thumb mode assuming that r1 is free.
15389      For ARM mode do the same but only if the starting register
15390      can be made to be even.  */
15391   if (const_store
15392       && REGNO (operands[0]) == REGNO (operands[1])
15393       && INTVAL (operands[4]) != INTVAL (operands[5]))
15394     {
15395     if (TARGET_THUMB2)
15396       {
15397         CLEAR_HARD_REG_SET (regset);
15398         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15399         if (tmp == NULL_RTX)
15400           return false;
15401
15402         /* Use the new register in the first load to ensure that
15403            if the original input register is not dead after peephole,
15404            then it will have the correct constant value.  */
15405         operands[0] = tmp;
15406       }
15407     else if (TARGET_ARM)
15408       {
15409         int regno = REGNO (operands[0]);
15410         if (!peep2_reg_dead_p (4, operands[0]))
15411           {
15412             /* When the input register is even and is not dead after the
15413                pattern, it has to hold the second constant but we cannot
15414                form a legal STRD in ARM mode with this register as the second
15415                register.  */
15416             if (regno % 2 == 0)
15417               return false;
15418
15419             /* Is regno-1 free? */
15420             SET_HARD_REG_SET (regset);
15421             CLEAR_HARD_REG_BIT(regset, regno - 1);
15422             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15423             if (tmp == NULL_RTX)
15424               return false;
15425
15426             operands[0] = tmp;
15427           }
15428         else
15429           {
15430             /* Find a DImode register.  */
15431             CLEAR_HARD_REG_SET (regset);
15432             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15433             if (tmp != NULL_RTX)
15434               {
15435                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15436                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15437               }
15438             else
15439               {
15440                 /* Can we use the input register to form a DI register?  */
15441                 SET_HARD_REG_SET (regset);
15442                 CLEAR_HARD_REG_BIT(regset,
15443                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15444                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15445                 if (tmp == NULL_RTX)
15446                   return false;
15447                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15448               }
15449           }
15450
15451         gcc_assert (operands[0] != NULL_RTX);
15452         gcc_assert (operands[1] != NULL_RTX);
15453         gcc_assert (REGNO (operands[0]) % 2 == 0);
15454         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15455       }
15456     }
15457
15458   /* Make sure the instructions are ordered with lower memory access first.  */
15459   if (offsets[0] > offsets[1])
15460     {
15461       gap = offsets[0] - offsets[1];
15462       offset = offsets[1];
15463
15464       /* Swap the instructions such that lower memory is accessed first.  */
15465       std::swap (operands[0], operands[1]);
15466       std::swap (operands[2], operands[3]);
15467       if (const_store)
15468         std::swap (operands[4], operands[5]);
15469     }
15470   else
15471     {
15472       gap = offsets[1] - offsets[0];
15473       offset = offsets[0];
15474     }
15475
15476   /* Make sure accesses are to consecutive memory locations.  */
15477   if (gap != 4)
15478     return false;
15479
15480   /* Make sure we generate legal instructions.  */
15481   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15482                              false, load))
15483     return true;
15484
15485   /* In Thumb state, where registers are almost unconstrained, there
15486      is little hope to fix it.  */
15487   if (TARGET_THUMB2)
15488     return false;
15489
15490   if (load && commute)
15491     {
15492       /* Try reordering registers.  */
15493       std::swap (operands[0], operands[1]);
15494       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15495                                  false, load))
15496         return true;
15497     }
15498
15499   if (const_store)
15500     {
15501       /* If input registers are dead after this pattern, they can be
15502          reordered or replaced by other registers that are free in the
15503          current pattern.  */
15504       if (!peep2_reg_dead_p (4, operands[0])
15505           || !peep2_reg_dead_p (4, operands[1]))
15506         return false;
15507
15508       /* Try to reorder the input registers.  */
15509       /* For example, the code
15510            mov r0, 0
15511            mov r1, 1
15512            str r1, [r2]
15513            str r0, [r2, #4]
15514          can be transformed into
15515            mov r1, 0
15516            mov r0, 1
15517            strd r0, [r2]
15518       */
15519       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15520                                   false, false))
15521         {
15522           std::swap (operands[0], operands[1]);
15523           return true;
15524         }
15525
15526       /* Try to find a free DI register.  */
15527       CLEAR_HARD_REG_SET (regset);
15528       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15529       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15530       while (true)
15531         {
15532           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15533           if (tmp == NULL_RTX)
15534             return false;
15535
15536           /* DREG must be an even-numbered register in DImode.
15537              Split it into SI registers.  */
15538           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15539           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15540           gcc_assert (operands[0] != NULL_RTX);
15541           gcc_assert (operands[1] != NULL_RTX);
15542           gcc_assert (REGNO (operands[0]) % 2 == 0);
15543           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15544
15545           return (operands_ok_ldrd_strd (operands[0], operands[1],
15546                                          base, offset,
15547                                          false, load));
15548         }
15549     }
15550
15551   return false;
15552 }
15553
15554
15555
15556 \f
15557 /* Print a symbolic form of X to the debug file, F.  */
15558 static void
15559 arm_print_value (FILE *f, rtx x)
15560 {
15561   switch (GET_CODE (x))
15562     {
15563     case CONST_INT:
15564       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15565       return;
15566
15567     case CONST_DOUBLE:
15568       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15569       return;
15570
15571     case CONST_VECTOR:
15572       {
15573         int i;
15574
15575         fprintf (f, "<");
15576         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15577           {
15578             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15579             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15580               fputc (',', f);
15581           }
15582         fprintf (f, ">");
15583       }
15584       return;
15585
15586     case CONST_STRING:
15587       fprintf (f, "\"%s\"", XSTR (x, 0));
15588       return;
15589
15590     case SYMBOL_REF:
15591       fprintf (f, "`%s'", XSTR (x, 0));
15592       return;
15593
15594     case LABEL_REF:
15595       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15596       return;
15597
15598     case CONST:
15599       arm_print_value (f, XEXP (x, 0));
15600       return;
15601
15602     case PLUS:
15603       arm_print_value (f, XEXP (x, 0));
15604       fprintf (f, "+");
15605       arm_print_value (f, XEXP (x, 1));
15606       return;
15607
15608     case PC:
15609       fprintf (f, "pc");
15610       return;
15611
15612     default:
15613       fprintf (f, "????");
15614       return;
15615     }
15616 }
15617 \f
15618 /* Routines for manipulation of the constant pool.  */
15619
15620 /* Arm instructions cannot load a large constant directly into a
15621    register; they have to come from a pc relative load.  The constant
15622    must therefore be placed in the addressable range of the pc
15623    relative load.  Depending on the precise pc relative load
15624    instruction the range is somewhere between 256 bytes and 4k.  This
15625    means that we often have to dump a constant inside a function, and
15626    generate code to branch around it.
15627
15628    It is important to minimize this, since the branches will slow
15629    things down and make the code larger.
15630
15631    Normally we can hide the table after an existing unconditional
15632    branch so that there is no interruption of the flow, but in the
15633    worst case the code looks like this:
15634
15635         ldr     rn, L1
15636         ...
15637         b       L2
15638         align
15639         L1:     .long value
15640         L2:
15641         ...
15642
15643         ldr     rn, L3
15644         ...
15645         b       L4
15646         align
15647         L3:     .long value
15648         L4:
15649         ...
15650
15651    We fix this by performing a scan after scheduling, which notices
15652    which instructions need to have their operands fetched from the
15653    constant table and builds the table.
15654
15655    The algorithm starts by building a table of all the constants that
15656    need fixing up and all the natural barriers in the function (places
15657    where a constant table can be dropped without breaking the flow).
15658    For each fixup we note how far the pc-relative replacement will be
15659    able to reach and the offset of the instruction into the function.
15660
15661    Having built the table we then group the fixes together to form
15662    tables that are as large as possible (subject to addressing
15663    constraints) and emit each table of constants after the last
15664    barrier that is within range of all the instructions in the group.
15665    If a group does not contain a barrier, then we forcibly create one
15666    by inserting a jump instruction into the flow.  Once the table has
15667    been inserted, the insns are then modified to reference the
15668    relevant entry in the pool.
15669
15670    Possible enhancements to the algorithm (not implemented) are:
15671
15672    1) For some processors and object formats, there may be benefit in
15673    aligning the pools to the start of cache lines; this alignment
15674    would need to be taken into account when calculating addressability
15675    of a pool.  */
15676
15677 /* These typedefs are located at the start of this file, so that
15678    they can be used in the prototypes there.  This comment is to
15679    remind readers of that fact so that the following structures
15680    can be understood more easily.
15681
15682      typedef struct minipool_node    Mnode;
15683      typedef struct minipool_fixup   Mfix;  */
15684
15685 struct minipool_node
15686 {
15687   /* Doubly linked chain of entries.  */
15688   Mnode * next;
15689   Mnode * prev;
15690   /* The maximum offset into the code that this entry can be placed.  While
15691      pushing fixes for forward references, all entries are sorted in order
15692      of increasing max_address.  */
15693   HOST_WIDE_INT max_address;
15694   /* Similarly for an entry inserted for a backwards ref.  */
15695   HOST_WIDE_INT min_address;
15696   /* The number of fixes referencing this entry.  This can become zero
15697      if we "unpush" an entry.  In this case we ignore the entry when we
15698      come to emit the code.  */
15699   int refcount;
15700   /* The offset from the start of the minipool.  */
15701   HOST_WIDE_INT offset;
15702   /* The value in table.  */
15703   rtx value;
15704   /* The mode of value.  */
15705   machine_mode mode;
15706   /* The size of the value.  With iWMMXt enabled
15707      sizes > 4 also imply an alignment of 8-bytes.  */
15708   int fix_size;
15709 };
15710
15711 struct minipool_fixup
15712 {
15713   Mfix *            next;
15714   rtx_insn *        insn;
15715   HOST_WIDE_INT     address;
15716   rtx *             loc;
15717   machine_mode mode;
15718   int               fix_size;
15719   rtx               value;
15720   Mnode *           minipool;
15721   HOST_WIDE_INT     forwards;
15722   HOST_WIDE_INT     backwards;
15723 };
15724
15725 /* Fixes less than a word need padding out to a word boundary.  */
15726 #define MINIPOOL_FIX_SIZE(mode) \
15727   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15728
15729 static Mnode *  minipool_vector_head;
15730 static Mnode *  minipool_vector_tail;
15731 static rtx_code_label   *minipool_vector_label;
15732 static int      minipool_pad;
15733
15734 /* The linked list of all minipool fixes required for this function.  */
15735 Mfix *          minipool_fix_head;
15736 Mfix *          minipool_fix_tail;
15737 /* The fix entry for the current minipool, once it has been placed.  */
15738 Mfix *          minipool_barrier;
15739
15740 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15741 #define JUMP_TABLES_IN_TEXT_SECTION 0
15742 #endif
15743
15744 static HOST_WIDE_INT
15745 get_jump_table_size (rtx_jump_table_data *insn)
15746 {
15747   /* ADDR_VECs only take room if read-only data does into the text
15748      section.  */
15749   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15750     {
15751       rtx body = PATTERN (insn);
15752       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15753       HOST_WIDE_INT size;
15754       HOST_WIDE_INT modesize;
15755
15756       modesize = GET_MODE_SIZE (GET_MODE (body));
15757       size = modesize * XVECLEN (body, elt);
15758       switch (modesize)
15759         {
15760         case 1:
15761           /* Round up size  of TBB table to a halfword boundary.  */
15762           size = (size + 1) & ~HOST_WIDE_INT_1;
15763           break;
15764         case 2:
15765           /* No padding necessary for TBH.  */
15766           break;
15767         case 4:
15768           /* Add two bytes for alignment on Thumb.  */
15769           if (TARGET_THUMB)
15770             size += 2;
15771           break;
15772         default:
15773           gcc_unreachable ();
15774         }
15775       return size;
15776     }
15777
15778   return 0;
15779 }
15780
15781 /* Return the maximum amount of padding that will be inserted before
15782    label LABEL.  */
15783
15784 static HOST_WIDE_INT
15785 get_label_padding (rtx label)
15786 {
15787   HOST_WIDE_INT align, min_insn_size;
15788
15789   align = 1 << label_to_alignment (label);
15790   min_insn_size = TARGET_THUMB ? 2 : 4;
15791   return align > min_insn_size ? align - min_insn_size : 0;
15792 }
15793
15794 /* Move a minipool fix MP from its current location to before MAX_MP.
15795    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15796    constraints may need updating.  */
15797 static Mnode *
15798 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15799                                HOST_WIDE_INT max_address)
15800 {
15801   /* The code below assumes these are different.  */
15802   gcc_assert (mp != max_mp);
15803
15804   if (max_mp == NULL)
15805     {
15806       if (max_address < mp->max_address)
15807         mp->max_address = max_address;
15808     }
15809   else
15810     {
15811       if (max_address > max_mp->max_address - mp->fix_size)
15812         mp->max_address = max_mp->max_address - mp->fix_size;
15813       else
15814         mp->max_address = max_address;
15815
15816       /* Unlink MP from its current position.  Since max_mp is non-null,
15817        mp->prev must be non-null.  */
15818       mp->prev->next = mp->next;
15819       if (mp->next != NULL)
15820         mp->next->prev = mp->prev;
15821       else
15822         minipool_vector_tail = mp->prev;
15823
15824       /* Re-insert it before MAX_MP.  */
15825       mp->next = max_mp;
15826       mp->prev = max_mp->prev;
15827       max_mp->prev = mp;
15828
15829       if (mp->prev != NULL)
15830         mp->prev->next = mp;
15831       else
15832         minipool_vector_head = mp;
15833     }
15834
15835   /* Save the new entry.  */
15836   max_mp = mp;
15837
15838   /* Scan over the preceding entries and adjust their addresses as
15839      required.  */
15840   while (mp->prev != NULL
15841          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15842     {
15843       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15844       mp = mp->prev;
15845     }
15846
15847   return max_mp;
15848 }
15849
15850 /* Add a constant to the minipool for a forward reference.  Returns the
15851    node added or NULL if the constant will not fit in this pool.  */
15852 static Mnode *
15853 add_minipool_forward_ref (Mfix *fix)
15854 {
15855   /* If set, max_mp is the first pool_entry that has a lower
15856      constraint than the one we are trying to add.  */
15857   Mnode *       max_mp = NULL;
15858   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15859   Mnode *       mp;
15860
15861   /* If the minipool starts before the end of FIX->INSN then this FIX
15862      can not be placed into the current pool.  Furthermore, adding the
15863      new constant pool entry may cause the pool to start FIX_SIZE bytes
15864      earlier.  */
15865   if (minipool_vector_head &&
15866       (fix->address + get_attr_length (fix->insn)
15867        >= minipool_vector_head->max_address - fix->fix_size))
15868     return NULL;
15869
15870   /* Scan the pool to see if a constant with the same value has
15871      already been added.  While we are doing this, also note the
15872      location where we must insert the constant if it doesn't already
15873      exist.  */
15874   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15875     {
15876       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15877           && fix->mode == mp->mode
15878           && (!LABEL_P (fix->value)
15879               || (CODE_LABEL_NUMBER (fix->value)
15880                   == CODE_LABEL_NUMBER (mp->value)))
15881           && rtx_equal_p (fix->value, mp->value))
15882         {
15883           /* More than one fix references this entry.  */
15884           mp->refcount++;
15885           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15886         }
15887
15888       /* Note the insertion point if necessary.  */
15889       if (max_mp == NULL
15890           && mp->max_address > max_address)
15891         max_mp = mp;
15892
15893       /* If we are inserting an 8-bytes aligned quantity and
15894          we have not already found an insertion point, then
15895          make sure that all such 8-byte aligned quantities are
15896          placed at the start of the pool.  */
15897       if (ARM_DOUBLEWORD_ALIGN
15898           && max_mp == NULL
15899           && fix->fix_size >= 8
15900           && mp->fix_size < 8)
15901         {
15902           max_mp = mp;
15903           max_address = mp->max_address;
15904         }
15905     }
15906
15907   /* The value is not currently in the minipool, so we need to create
15908      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15909      the end of the list since the placement is less constrained than
15910      any existing entry.  Otherwise, we insert the new fix before
15911      MAX_MP and, if necessary, adjust the constraints on the other
15912      entries.  */
15913   mp = XNEW (Mnode);
15914   mp->fix_size = fix->fix_size;
15915   mp->mode = fix->mode;
15916   mp->value = fix->value;
15917   mp->refcount = 1;
15918   /* Not yet required for a backwards ref.  */
15919   mp->min_address = -65536;
15920
15921   if (max_mp == NULL)
15922     {
15923       mp->max_address = max_address;
15924       mp->next = NULL;
15925       mp->prev = minipool_vector_tail;
15926
15927       if (mp->prev == NULL)
15928         {
15929           minipool_vector_head = mp;
15930           minipool_vector_label = gen_label_rtx ();
15931         }
15932       else
15933         mp->prev->next = mp;
15934
15935       minipool_vector_tail = mp;
15936     }
15937   else
15938     {
15939       if (max_address > max_mp->max_address - mp->fix_size)
15940         mp->max_address = max_mp->max_address - mp->fix_size;
15941       else
15942         mp->max_address = max_address;
15943
15944       mp->next = max_mp;
15945       mp->prev = max_mp->prev;
15946       max_mp->prev = mp;
15947       if (mp->prev != NULL)
15948         mp->prev->next = mp;
15949       else
15950         minipool_vector_head = mp;
15951     }
15952
15953   /* Save the new entry.  */
15954   max_mp = mp;
15955
15956   /* Scan over the preceding entries and adjust their addresses as
15957      required.  */
15958   while (mp->prev != NULL
15959          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15960     {
15961       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15962       mp = mp->prev;
15963     }
15964
15965   return max_mp;
15966 }
15967
15968 static Mnode *
15969 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15970                                 HOST_WIDE_INT  min_address)
15971 {
15972   HOST_WIDE_INT offset;
15973
15974   /* The code below assumes these are different.  */
15975   gcc_assert (mp != min_mp);
15976
15977   if (min_mp == NULL)
15978     {
15979       if (min_address > mp->min_address)
15980         mp->min_address = min_address;
15981     }
15982   else
15983     {
15984       /* We will adjust this below if it is too loose.  */
15985       mp->min_address = min_address;
15986
15987       /* Unlink MP from its current position.  Since min_mp is non-null,
15988          mp->next must be non-null.  */
15989       mp->next->prev = mp->prev;
15990       if (mp->prev != NULL)
15991         mp->prev->next = mp->next;
15992       else
15993         minipool_vector_head = mp->next;
15994
15995       /* Reinsert it after MIN_MP.  */
15996       mp->prev = min_mp;
15997       mp->next = min_mp->next;
15998       min_mp->next = mp;
15999       if (mp->next != NULL)
16000         mp->next->prev = mp;
16001       else
16002         minipool_vector_tail = mp;
16003     }
16004
16005   min_mp = mp;
16006
16007   offset = 0;
16008   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16009     {
16010       mp->offset = offset;
16011       if (mp->refcount > 0)
16012         offset += mp->fix_size;
16013
16014       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16015         mp->next->min_address = mp->min_address + mp->fix_size;
16016     }
16017
16018   return min_mp;
16019 }
16020
16021 /* Add a constant to the minipool for a backward reference.  Returns the
16022    node added or NULL if the constant will not fit in this pool.
16023
16024    Note that the code for insertion for a backwards reference can be
16025    somewhat confusing because the calculated offsets for each fix do
16026    not take into account the size of the pool (which is still under
16027    construction.  */
16028 static Mnode *
16029 add_minipool_backward_ref (Mfix *fix)
16030 {
16031   /* If set, min_mp is the last pool_entry that has a lower constraint
16032      than the one we are trying to add.  */
16033   Mnode *min_mp = NULL;
16034   /* This can be negative, since it is only a constraint.  */
16035   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16036   Mnode *mp;
16037
16038   /* If we can't reach the current pool from this insn, or if we can't
16039      insert this entry at the end of the pool without pushing other
16040      fixes out of range, then we don't try.  This ensures that we
16041      can't fail later on.  */
16042   if (min_address >= minipool_barrier->address
16043       || (minipool_vector_tail->min_address + fix->fix_size
16044           >= minipool_barrier->address))
16045     return NULL;
16046
16047   /* Scan the pool to see if a constant with the same value has
16048      already been added.  While we are doing this, also note the
16049      location where we must insert the constant if it doesn't already
16050      exist.  */
16051   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16052     {
16053       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16054           && fix->mode == mp->mode
16055           && (!LABEL_P (fix->value)
16056               || (CODE_LABEL_NUMBER (fix->value)
16057                   == CODE_LABEL_NUMBER (mp->value)))
16058           && rtx_equal_p (fix->value, mp->value)
16059           /* Check that there is enough slack to move this entry to the
16060              end of the table (this is conservative).  */
16061           && (mp->max_address
16062               > (minipool_barrier->address
16063                  + minipool_vector_tail->offset
16064                  + minipool_vector_tail->fix_size)))
16065         {
16066           mp->refcount++;
16067           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16068         }
16069
16070       if (min_mp != NULL)
16071         mp->min_address += fix->fix_size;
16072       else
16073         {
16074           /* Note the insertion point if necessary.  */
16075           if (mp->min_address < min_address)
16076             {
16077               /* For now, we do not allow the insertion of 8-byte alignment
16078                  requiring nodes anywhere but at the start of the pool.  */
16079               if (ARM_DOUBLEWORD_ALIGN
16080                   && fix->fix_size >= 8 && mp->fix_size < 8)
16081                 return NULL;
16082               else
16083                 min_mp = mp;
16084             }
16085           else if (mp->max_address
16086                    < minipool_barrier->address + mp->offset + fix->fix_size)
16087             {
16088               /* Inserting before this entry would push the fix beyond
16089                  its maximum address (which can happen if we have
16090                  re-located a forwards fix); force the new fix to come
16091                  after it.  */
16092               if (ARM_DOUBLEWORD_ALIGN
16093                   && fix->fix_size >= 8 && mp->fix_size < 8)
16094                 return NULL;
16095               else
16096                 {
16097                   min_mp = mp;
16098                   min_address = mp->min_address + fix->fix_size;
16099                 }
16100             }
16101           /* Do not insert a non-8-byte aligned quantity before 8-byte
16102              aligned quantities.  */
16103           else if (ARM_DOUBLEWORD_ALIGN
16104                    && fix->fix_size < 8
16105                    && mp->fix_size >= 8)
16106             {
16107               min_mp = mp;
16108               min_address = mp->min_address + fix->fix_size;
16109             }
16110         }
16111     }
16112
16113   /* We need to create a new entry.  */
16114   mp = XNEW (Mnode);
16115   mp->fix_size = fix->fix_size;
16116   mp->mode = fix->mode;
16117   mp->value = fix->value;
16118   mp->refcount = 1;
16119   mp->max_address = minipool_barrier->address + 65536;
16120
16121   mp->min_address = min_address;
16122
16123   if (min_mp == NULL)
16124     {
16125       mp->prev = NULL;
16126       mp->next = minipool_vector_head;
16127
16128       if (mp->next == NULL)
16129         {
16130           minipool_vector_tail = mp;
16131           minipool_vector_label = gen_label_rtx ();
16132         }
16133       else
16134         mp->next->prev = mp;
16135
16136       minipool_vector_head = mp;
16137     }
16138   else
16139     {
16140       mp->next = min_mp->next;
16141       mp->prev = min_mp;
16142       min_mp->next = mp;
16143
16144       if (mp->next != NULL)
16145         mp->next->prev = mp;
16146       else
16147         minipool_vector_tail = mp;
16148     }
16149
16150   /* Save the new entry.  */
16151   min_mp = mp;
16152
16153   if (mp->prev)
16154     mp = mp->prev;
16155   else
16156     mp->offset = 0;
16157
16158   /* Scan over the following entries and adjust their offsets.  */
16159   while (mp->next != NULL)
16160     {
16161       if (mp->next->min_address < mp->min_address + mp->fix_size)
16162         mp->next->min_address = mp->min_address + mp->fix_size;
16163
16164       if (mp->refcount)
16165         mp->next->offset = mp->offset + mp->fix_size;
16166       else
16167         mp->next->offset = mp->offset;
16168
16169       mp = mp->next;
16170     }
16171
16172   return min_mp;
16173 }
16174
16175 static void
16176 assign_minipool_offsets (Mfix *barrier)
16177 {
16178   HOST_WIDE_INT offset = 0;
16179   Mnode *mp;
16180
16181   minipool_barrier = barrier;
16182
16183   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16184     {
16185       mp->offset = offset;
16186
16187       if (mp->refcount > 0)
16188         offset += mp->fix_size;
16189     }
16190 }
16191
16192 /* Output the literal table */
16193 static void
16194 dump_minipool (rtx_insn *scan)
16195 {
16196   Mnode * mp;
16197   Mnode * nmp;
16198   int align64 = 0;
16199
16200   if (ARM_DOUBLEWORD_ALIGN)
16201     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16202       if (mp->refcount > 0 && mp->fix_size >= 8)
16203         {
16204           align64 = 1;
16205           break;
16206         }
16207
16208   if (dump_file)
16209     fprintf (dump_file,
16210              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16211              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16212
16213   scan = emit_label_after (gen_label_rtx (), scan);
16214   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16215   scan = emit_label_after (minipool_vector_label, scan);
16216
16217   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16218     {
16219       if (mp->refcount > 0)
16220         {
16221           if (dump_file)
16222             {
16223               fprintf (dump_file,
16224                        ";;  Offset %u, min %ld, max %ld ",
16225                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16226                        (unsigned long) mp->max_address);
16227               arm_print_value (dump_file, mp->value);
16228               fputc ('\n', dump_file);
16229             }
16230
16231           rtx val = copy_rtx (mp->value);
16232
16233           switch (GET_MODE_SIZE (mp->mode))
16234             {
16235 #ifdef HAVE_consttable_1
16236             case 1:
16237               scan = emit_insn_after (gen_consttable_1 (val), scan);
16238               break;
16239
16240 #endif
16241 #ifdef HAVE_consttable_2
16242             case 2:
16243               scan = emit_insn_after (gen_consttable_2 (val), scan);
16244               break;
16245
16246 #endif
16247 #ifdef HAVE_consttable_4
16248             case 4:
16249               scan = emit_insn_after (gen_consttable_4 (val), scan);
16250               break;
16251
16252 #endif
16253 #ifdef HAVE_consttable_8
16254             case 8:
16255               scan = emit_insn_after (gen_consttable_8 (val), scan);
16256               break;
16257
16258 #endif
16259 #ifdef HAVE_consttable_16
16260             case 16:
16261               scan = emit_insn_after (gen_consttable_16 (val), scan);
16262               break;
16263
16264 #endif
16265             default:
16266               gcc_unreachable ();
16267             }
16268         }
16269
16270       nmp = mp->next;
16271       free (mp);
16272     }
16273
16274   minipool_vector_head = minipool_vector_tail = NULL;
16275   scan = emit_insn_after (gen_consttable_end (), scan);
16276   scan = emit_barrier_after (scan);
16277 }
16278
16279 /* Return the cost of forcibly inserting a barrier after INSN.  */
16280 static int
16281 arm_barrier_cost (rtx_insn *insn)
16282 {
16283   /* Basing the location of the pool on the loop depth is preferable,
16284      but at the moment, the basic block information seems to be
16285      corrupt by this stage of the compilation.  */
16286   int base_cost = 50;
16287   rtx_insn *next = next_nonnote_insn (insn);
16288
16289   if (next != NULL && LABEL_P (next))
16290     base_cost -= 20;
16291
16292   switch (GET_CODE (insn))
16293     {
16294     case CODE_LABEL:
16295       /* It will always be better to place the table before the label, rather
16296          than after it.  */
16297       return 50;
16298
16299     case INSN:
16300     case CALL_INSN:
16301       return base_cost;
16302
16303     case JUMP_INSN:
16304       return base_cost - 10;
16305
16306     default:
16307       return base_cost + 10;
16308     }
16309 }
16310
16311 /* Find the best place in the insn stream in the range
16312    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16313    Create the barrier by inserting a jump and add a new fix entry for
16314    it.  */
16315 static Mfix *
16316 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16317 {
16318   HOST_WIDE_INT count = 0;
16319   rtx_barrier *barrier;
16320   rtx_insn *from = fix->insn;
16321   /* The instruction after which we will insert the jump.  */
16322   rtx_insn *selected = NULL;
16323   int selected_cost;
16324   /* The address at which the jump instruction will be placed.  */
16325   HOST_WIDE_INT selected_address;
16326   Mfix * new_fix;
16327   HOST_WIDE_INT max_count = max_address - fix->address;
16328   rtx_code_label *label = gen_label_rtx ();
16329
16330   selected_cost = arm_barrier_cost (from);
16331   selected_address = fix->address;
16332
16333   while (from && count < max_count)
16334     {
16335       rtx_jump_table_data *tmp;
16336       int new_cost;
16337
16338       /* This code shouldn't have been called if there was a natural barrier
16339          within range.  */
16340       gcc_assert (!BARRIER_P (from));
16341
16342       /* Count the length of this insn.  This must stay in sync with the
16343          code that pushes minipool fixes.  */
16344       if (LABEL_P (from))
16345         count += get_label_padding (from);
16346       else
16347         count += get_attr_length (from);
16348
16349       /* If there is a jump table, add its length.  */
16350       if (tablejump_p (from, NULL, &tmp))
16351         {
16352           count += get_jump_table_size (tmp);
16353
16354           /* Jump tables aren't in a basic block, so base the cost on
16355              the dispatch insn.  If we select this location, we will
16356              still put the pool after the table.  */
16357           new_cost = arm_barrier_cost (from);
16358
16359           if (count < max_count
16360               && (!selected || new_cost <= selected_cost))
16361             {
16362               selected = tmp;
16363               selected_cost = new_cost;
16364               selected_address = fix->address + count;
16365             }
16366
16367           /* Continue after the dispatch table.  */
16368           from = NEXT_INSN (tmp);
16369           continue;
16370         }
16371
16372       new_cost = arm_barrier_cost (from);
16373
16374       if (count < max_count
16375           && (!selected || new_cost <= selected_cost))
16376         {
16377           selected = from;
16378           selected_cost = new_cost;
16379           selected_address = fix->address + count;
16380         }
16381
16382       from = NEXT_INSN (from);
16383     }
16384
16385   /* Make sure that we found a place to insert the jump.  */
16386   gcc_assert (selected);
16387
16388   /* Make sure we do not split a call and its corresponding
16389      CALL_ARG_LOCATION note.  */
16390   if (CALL_P (selected))
16391     {
16392       rtx_insn *next = NEXT_INSN (selected);
16393       if (next && NOTE_P (next)
16394           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16395           selected = next;
16396     }
16397
16398   /* Create a new JUMP_INSN that branches around a barrier.  */
16399   from = emit_jump_insn_after (gen_jump (label), selected);
16400   JUMP_LABEL (from) = label;
16401   barrier = emit_barrier_after (from);
16402   emit_label_after (label, barrier);
16403
16404   /* Create a minipool barrier entry for the new barrier.  */
16405   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16406   new_fix->insn = barrier;
16407   new_fix->address = selected_address;
16408   new_fix->next = fix->next;
16409   fix->next = new_fix;
16410
16411   return new_fix;
16412 }
16413
16414 /* Record that there is a natural barrier in the insn stream at
16415    ADDRESS.  */
16416 static void
16417 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16418 {
16419   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16420
16421   fix->insn = insn;
16422   fix->address = address;
16423
16424   fix->next = NULL;
16425   if (minipool_fix_head != NULL)
16426     minipool_fix_tail->next = fix;
16427   else
16428     minipool_fix_head = fix;
16429
16430   minipool_fix_tail = fix;
16431 }
16432
16433 /* Record INSN, which will need fixing up to load a value from the
16434    minipool.  ADDRESS is the offset of the insn since the start of the
16435    function; LOC is a pointer to the part of the insn which requires
16436    fixing; VALUE is the constant that must be loaded, which is of type
16437    MODE.  */
16438 static void
16439 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16440                    machine_mode mode, rtx value)
16441 {
16442   gcc_assert (!arm_disable_literal_pool);
16443   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16444
16445   fix->insn = insn;
16446   fix->address = address;
16447   fix->loc = loc;
16448   fix->mode = mode;
16449   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16450   fix->value = value;
16451   fix->forwards = get_attr_pool_range (insn);
16452   fix->backwards = get_attr_neg_pool_range (insn);
16453   fix->minipool = NULL;
16454
16455   /* If an insn doesn't have a range defined for it, then it isn't
16456      expecting to be reworked by this code.  Better to stop now than
16457      to generate duff assembly code.  */
16458   gcc_assert (fix->forwards || fix->backwards);
16459
16460   /* If an entry requires 8-byte alignment then assume all constant pools
16461      require 4 bytes of padding.  Trying to do this later on a per-pool
16462      basis is awkward because existing pool entries have to be modified.  */
16463   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16464     minipool_pad = 4;
16465
16466   if (dump_file)
16467     {
16468       fprintf (dump_file,
16469                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16470                GET_MODE_NAME (mode),
16471                INSN_UID (insn), (unsigned long) address,
16472                -1 * (long)fix->backwards, (long)fix->forwards);
16473       arm_print_value (dump_file, fix->value);
16474       fprintf (dump_file, "\n");
16475     }
16476
16477   /* Add it to the chain of fixes.  */
16478   fix->next = NULL;
16479
16480   if (minipool_fix_head != NULL)
16481     minipool_fix_tail->next = fix;
16482   else
16483     minipool_fix_head = fix;
16484
16485   minipool_fix_tail = fix;
16486 }
16487
16488 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16489    Returns the number of insns needed, or 99 if we always want to synthesize
16490    the value.  */
16491 int
16492 arm_max_const_double_inline_cost ()
16493 {
16494   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16495 }
16496
16497 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16498    Returns the number of insns needed, or 99 if we don't know how to
16499    do it.  */
16500 int
16501 arm_const_double_inline_cost (rtx val)
16502 {
16503   rtx lowpart, highpart;
16504   machine_mode mode;
16505
16506   mode = GET_MODE (val);
16507
16508   if (mode == VOIDmode)
16509     mode = DImode;
16510
16511   gcc_assert (GET_MODE_SIZE (mode) == 8);
16512
16513   lowpart = gen_lowpart (SImode, val);
16514   highpart = gen_highpart_mode (SImode, mode, val);
16515
16516   gcc_assert (CONST_INT_P (lowpart));
16517   gcc_assert (CONST_INT_P (highpart));
16518
16519   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16520                             NULL_RTX, NULL_RTX, 0, 0)
16521           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16522                               NULL_RTX, NULL_RTX, 0, 0));
16523 }
16524
16525 /* Cost of loading a SImode constant.  */
16526 static inline int
16527 arm_const_inline_cost (enum rtx_code code, rtx val)
16528 {
16529   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16530                            NULL_RTX, NULL_RTX, 1, 0);
16531 }
16532
16533 /* Return true if it is worthwhile to split a 64-bit constant into two
16534    32-bit operations.  This is the case if optimizing for size, or
16535    if we have load delay slots, or if one 32-bit part can be done with
16536    a single data operation.  */
16537 bool
16538 arm_const_double_by_parts (rtx val)
16539 {
16540   machine_mode mode = GET_MODE (val);
16541   rtx part;
16542
16543   if (optimize_size || arm_ld_sched)
16544     return true;
16545
16546   if (mode == VOIDmode)
16547     mode = DImode;
16548
16549   part = gen_highpart_mode (SImode, mode, val);
16550
16551   gcc_assert (CONST_INT_P (part));
16552
16553   if (const_ok_for_arm (INTVAL (part))
16554       || const_ok_for_arm (~INTVAL (part)))
16555     return true;
16556
16557   part = gen_lowpart (SImode, val);
16558
16559   gcc_assert (CONST_INT_P (part));
16560
16561   if (const_ok_for_arm (INTVAL (part))
16562       || const_ok_for_arm (~INTVAL (part)))
16563     return true;
16564
16565   return false;
16566 }
16567
16568 /* Return true if it is possible to inline both the high and low parts
16569    of a 64-bit constant into 32-bit data processing instructions.  */
16570 bool
16571 arm_const_double_by_immediates (rtx val)
16572 {
16573   machine_mode mode = GET_MODE (val);
16574   rtx part;
16575
16576   if (mode == VOIDmode)
16577     mode = DImode;
16578
16579   part = gen_highpart_mode (SImode, mode, val);
16580
16581   gcc_assert (CONST_INT_P (part));
16582
16583   if (!const_ok_for_arm (INTVAL (part)))
16584     return false;
16585
16586   part = gen_lowpart (SImode, val);
16587
16588   gcc_assert (CONST_INT_P (part));
16589
16590   if (!const_ok_for_arm (INTVAL (part)))
16591     return false;
16592
16593   return true;
16594 }
16595
16596 /* Scan INSN and note any of its operands that need fixing.
16597    If DO_PUSHES is false we do not actually push any of the fixups
16598    needed.  */
16599 static void
16600 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16601 {
16602   int opno;
16603
16604   extract_constrain_insn (insn);
16605
16606   if (recog_data.n_alternatives == 0)
16607     return;
16608
16609   /* Fill in recog_op_alt with information about the constraints of
16610      this insn.  */
16611   preprocess_constraints (insn);
16612
16613   const operand_alternative *op_alt = which_op_alt ();
16614   for (opno = 0; opno < recog_data.n_operands; opno++)
16615     {
16616       /* Things we need to fix can only occur in inputs.  */
16617       if (recog_data.operand_type[opno] != OP_IN)
16618         continue;
16619
16620       /* If this alternative is a memory reference, then any mention
16621          of constants in this alternative is really to fool reload
16622          into allowing us to accept one there.  We need to fix them up
16623          now so that we output the right code.  */
16624       if (op_alt[opno].memory_ok)
16625         {
16626           rtx op = recog_data.operand[opno];
16627
16628           if (CONSTANT_P (op))
16629             {
16630               if (do_pushes)
16631                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16632                                    recog_data.operand_mode[opno], op);
16633             }
16634           else if (MEM_P (op)
16635                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16636                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16637             {
16638               if (do_pushes)
16639                 {
16640                   rtx cop = avoid_constant_pool_reference (op);
16641
16642                   /* Casting the address of something to a mode narrower
16643                      than a word can cause avoid_constant_pool_reference()
16644                      to return the pool reference itself.  That's no good to
16645                      us here.  Lets just hope that we can use the
16646                      constant pool value directly.  */
16647                   if (op == cop)
16648                     cop = get_pool_constant (XEXP (op, 0));
16649
16650                   push_minipool_fix (insn, address,
16651                                      recog_data.operand_loc[opno],
16652                                      recog_data.operand_mode[opno], cop);
16653                 }
16654
16655             }
16656         }
16657     }
16658
16659   return;
16660 }
16661
16662 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16663    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16664    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16665    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16666    or four masks, depending on whether it is being computed for a
16667    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16668    respectively.  The tree for the type of the argument or a field within an
16669    argument is passed in ARG_TYPE, the current register this argument or field
16670    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16671    argument or field starts at is passed in STARTING_BIT and the last used bit
16672    is kept in LAST_USED_BIT which is also updated accordingly.  */
16673
16674 static unsigned HOST_WIDE_INT
16675 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16676                                uint32_t * padding_bits_to_clear,
16677                                unsigned starting_bit, int * last_used_bit)
16678
16679 {
16680   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16681
16682   if (TREE_CODE (arg_type) == RECORD_TYPE)
16683     {
16684       unsigned current_bit = starting_bit;
16685       tree field;
16686       long int offset, size;
16687
16688
16689       field = TYPE_FIELDS (arg_type);
16690       while (field)
16691         {
16692           /* The offset within a structure is always an offset from
16693              the start of that structure.  Make sure we take that into the
16694              calculation of the register based offset that we use here.  */
16695           offset = starting_bit;
16696           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16697           offset %= 32;
16698
16699           /* This is the actual size of the field, for bitfields this is the
16700              bitfield width and not the container size.  */
16701           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16702
16703           if (*last_used_bit != offset)
16704             {
16705               if (offset < *last_used_bit)
16706                 {
16707                   /* This field's offset is before the 'last_used_bit', that
16708                      means this field goes on the next register.  So we need to
16709                      pad the rest of the current register and increase the
16710                      register number.  */
16711                   uint32_t mask;
16712                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16713                   mask++;
16714
16715                   padding_bits_to_clear[*regno] |= mask;
16716                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16717                   (*regno)++;
16718                 }
16719               else
16720                 {
16721                   /* Otherwise we pad the bits between the last field's end and
16722                      the start of the new field.  */
16723                   uint32_t mask;
16724
16725                   mask = ((uint32_t)-1) >> (32 - offset);
16726                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16727                   padding_bits_to_clear[*regno] |= mask;
16728                 }
16729               current_bit = offset;
16730             }
16731
16732           /* Calculate further padding bits for inner structs/unions too.  */
16733           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16734             {
16735               *last_used_bit = current_bit;
16736               not_to_clear_reg_mask
16737                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16738                                                   padding_bits_to_clear, offset,
16739                                                   last_used_bit);
16740             }
16741           else
16742             {
16743               /* Update 'current_bit' with this field's size.  If the
16744                  'current_bit' lies in a subsequent register, update 'regno' and
16745                  reset 'current_bit' to point to the current bit in that new
16746                  register.  */
16747               current_bit += size;
16748               while (current_bit >= 32)
16749                 {
16750                   current_bit-=32;
16751                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16752                   (*regno)++;
16753                 }
16754               *last_used_bit = current_bit;
16755             }
16756
16757           field = TREE_CHAIN (field);
16758         }
16759       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16760     }
16761   else if (TREE_CODE (arg_type) == UNION_TYPE)
16762     {
16763       tree field, field_t;
16764       int i, regno_t, field_size;
16765       int max_reg = -1;
16766       int max_bit = -1;
16767       uint32_t mask;
16768       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16769         = {-1, -1, -1, -1};
16770
16771       /* To compute the padding bits in a union we only consider bits as
16772          padding bits if they are always either a padding bit or fall outside a
16773          fields size for all fields in the union.  */
16774       field = TYPE_FIELDS (arg_type);
16775       while (field)
16776         {
16777           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16778             = {0U, 0U, 0U, 0U};
16779           int last_used_bit_t = *last_used_bit;
16780           regno_t = *regno;
16781           field_t = TREE_TYPE (field);
16782
16783           /* If the field's type is either a record or a union make sure to
16784              compute their padding bits too.  */
16785           if (RECORD_OR_UNION_TYPE_P (field_t))
16786             not_to_clear_reg_mask
16787               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16788                                                 &padding_bits_to_clear_t[0],
16789                                                 starting_bit, &last_used_bit_t);
16790           else
16791             {
16792               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16793               regno_t = (field_size / 32) + *regno;
16794               last_used_bit_t = (starting_bit + field_size) % 32;
16795             }
16796
16797           for (i = *regno; i < regno_t; i++)
16798             {
16799               /* For all but the last register used by this field only keep the
16800                  padding bits that were padding bits in this field.  */
16801               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16802             }
16803
16804             /* For the last register, keep all padding bits that were padding
16805                bits in this field and any padding bits that are still valid
16806                as padding bits but fall outside of this field's size.  */
16807             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16808             padding_bits_to_clear_res[regno_t]
16809               &= padding_bits_to_clear_t[regno_t] | mask;
16810
16811           /* Update the maximum size of the fields in terms of registers used
16812              ('max_reg') and the 'last_used_bit' in said register.  */
16813           if (max_reg < regno_t)
16814             {
16815               max_reg = regno_t;
16816               max_bit = last_used_bit_t;
16817             }
16818           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16819             max_bit = last_used_bit_t;
16820
16821           field = TREE_CHAIN (field);
16822         }
16823
16824       /* Update the current padding_bits_to_clear using the intersection of the
16825          padding bits of all the fields.  */
16826       for (i=*regno; i < max_reg; i++)
16827         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16828
16829       /* Do not keep trailing padding bits, we do not know yet whether this
16830          is the end of the argument.  */
16831       mask = ((uint32_t) 1 << max_bit) - 1;
16832       padding_bits_to_clear[max_reg]
16833         |= padding_bits_to_clear_res[max_reg] & mask;
16834
16835       *regno = max_reg;
16836       *last_used_bit = max_bit;
16837     }
16838   else
16839     /* This function should only be used for structs and unions.  */
16840     gcc_unreachable ();
16841
16842   return not_to_clear_reg_mask;
16843 }
16844
16845 /* In the context of ARMv8-M Security Extensions, this function is used for both
16846    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16847    registers are used when returning or passing arguments, which is then
16848    returned as a mask.  It will also compute a mask to indicate padding/unused
16849    bits for each of these registers, and passes this through the
16850    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16851    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16852    the starting register used to pass this argument or return value is passed
16853    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16854    for struct and union types.  */
16855
16856 static unsigned HOST_WIDE_INT
16857 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16858                              uint32_t * padding_bits_to_clear)
16859
16860 {
16861   int last_used_bit = 0;
16862   unsigned HOST_WIDE_INT not_to_clear_mask;
16863
16864   if (RECORD_OR_UNION_TYPE_P (arg_type))
16865     {
16866       not_to_clear_mask
16867         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16868                                          padding_bits_to_clear, 0,
16869                                          &last_used_bit);
16870
16871
16872       /* If the 'last_used_bit' is not zero, that means we are still using a
16873          part of the last 'regno'.  In such cases we must clear the trailing
16874          bits.  Otherwise we are not using regno and we should mark it as to
16875          clear.  */
16876       if (last_used_bit != 0)
16877         padding_bits_to_clear[regno]
16878           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16879       else
16880         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16881     }
16882   else
16883     {
16884       not_to_clear_mask = 0;
16885       /* We are not dealing with structs nor unions.  So these arguments may be
16886          passed in floating point registers too.  In some cases a BLKmode is
16887          used when returning or passing arguments in multiple VFP registers.  */
16888       if (GET_MODE (arg_rtx) == BLKmode)
16889         {
16890           int i, arg_regs;
16891           rtx reg;
16892
16893           /* This should really only occur when dealing with the hard-float
16894              ABI.  */
16895           gcc_assert (TARGET_HARD_FLOAT_ABI);
16896
16897           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16898             {
16899               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16900               gcc_assert (REG_P (reg));
16901
16902               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16903
16904               /* If we are dealing with DF mode, make sure we don't
16905                  clear either of the registers it addresses.  */
16906               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16907               if (arg_regs > 1)
16908                 {
16909                   unsigned HOST_WIDE_INT mask;
16910                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16911                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16912                   not_to_clear_mask |= mask;
16913                 }
16914             }
16915         }
16916       else
16917         {
16918           /* Otherwise we can rely on the MODE to determine how many registers
16919              are being used by this argument.  */
16920           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16921           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16922           if (arg_regs > 1)
16923             {
16924               unsigned HOST_WIDE_INT
16925               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16926               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16927               not_to_clear_mask |= mask;
16928             }
16929         }
16930     }
16931
16932   return not_to_clear_mask;
16933 }
16934
16935 /* Clears caller saved registers not used to pass arguments before a
16936    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16937    registers is done in __gnu_cmse_nonsecure_call libcall.
16938    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16939
16940 static void
16941 cmse_nonsecure_call_clear_caller_saved (void)
16942 {
16943   basic_block bb;
16944
16945   FOR_EACH_BB_FN (bb, cfun)
16946     {
16947       rtx_insn *insn;
16948
16949       FOR_BB_INSNS (bb, insn)
16950         {
16951           uint64_t to_clear_mask, float_mask;
16952           rtx_insn *seq;
16953           rtx pat, call, unspec, reg, cleared_reg, tmp;
16954           unsigned int regno, maxregno;
16955           rtx address;
16956           CUMULATIVE_ARGS args_so_far_v;
16957           cumulative_args_t args_so_far;
16958           tree arg_type, fntype;
16959           bool using_r4, first_param = true;
16960           function_args_iterator args_iter;
16961           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16962           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16963
16964           if (!NONDEBUG_INSN_P (insn))
16965             continue;
16966
16967           if (!CALL_P (insn))
16968             continue;
16969
16970           pat = PATTERN (insn);
16971           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16972           call = XVECEXP (pat, 0, 0);
16973
16974           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16975           if (GET_CODE (call) == SET)
16976               call = SET_SRC (call);
16977
16978           /* Check if it is a cmse_nonsecure_call.  */
16979           unspec = XEXP (call, 0);
16980           if (GET_CODE (unspec) != UNSPEC
16981               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16982             continue;
16983
16984           /* Determine the caller-saved registers we need to clear.  */
16985           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16986           maxregno = NUM_ARG_REGS - 1;
16987           /* Only look at the caller-saved floating point registers in case of
16988              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
16989              lazy store and loads which clear both caller- and callee-saved
16990              registers.  */
16991           if (TARGET_HARD_FLOAT_ABI)
16992             {
16993               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16994               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16995               to_clear_mask |= float_mask;
16996               maxregno = D7_VFP_REGNUM;
16997             }
16998
16999           /* Make sure the register used to hold the function address is not
17000              cleared.  */
17001           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17002           gcc_assert (MEM_P (address));
17003           gcc_assert (REG_P (XEXP (address, 0)));
17004           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17005
17006           /* Set basic block of call insn so that df rescan is performed on
17007              insns inserted here.  */
17008           set_block_for_insn (insn, bb);
17009           df_set_flags (DF_DEFER_INSN_RESCAN);
17010           start_sequence ();
17011
17012           /* Make sure the scheduler doesn't schedule other insns beyond
17013              here.  */
17014           emit_insn (gen_blockage ());
17015
17016           /* Walk through all arguments and clear registers appropriately.
17017           */
17018           fntype = TREE_TYPE (MEM_EXPR (address));
17019           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17020                                     NULL_TREE);
17021           args_so_far = pack_cumulative_args (&args_so_far_v);
17022           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17023             {
17024               rtx arg_rtx;
17025               machine_mode arg_mode = TYPE_MODE (arg_type);
17026
17027               if (VOID_TYPE_P (arg_type))
17028                 continue;
17029
17030               if (!first_param)
17031                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17032                                           true);
17033
17034               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17035                                           true);
17036               gcc_assert (REG_P (arg_rtx));
17037               to_clear_mask
17038                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17039                                                REGNO (arg_rtx),
17040                                                padding_bits_to_clear_ptr);
17041
17042               first_param = false;
17043             }
17044
17045           /* Clear padding bits where needed.  */
17046           cleared_reg = XEXP (address, 0);
17047           reg = gen_rtx_REG (SImode, IP_REGNUM);
17048           using_r4 = false;
17049           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17050             {
17051               if (padding_bits_to_clear[regno] == 0)
17052                 continue;
17053
17054               /* If this is a Thumb-1 target copy the address of the function
17055                  we are calling from 'r4' into 'ip' such that we can use r4 to
17056                  clear the unused bits in the arguments.  */
17057               if (TARGET_THUMB1 && !using_r4)
17058                 {
17059                   using_r4 =  true;
17060                   reg = cleared_reg;
17061                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17062                                           reg);
17063                 }
17064
17065               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17066               emit_move_insn (reg, tmp);
17067               /* Also fill the top half of the negated
17068                  padding_bits_to_clear.  */
17069               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17070                 {
17071                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17072                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17073                                                                 GEN_INT (16),
17074                                                                 GEN_INT (16)),
17075                                           tmp));
17076                 }
17077
17078               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17079                                      gen_rtx_REG (SImode, regno),
17080                                      reg));
17081
17082             }
17083           if (using_r4)
17084             emit_move_insn (cleared_reg,
17085                             gen_rtx_REG (SImode, IP_REGNUM));
17086
17087           /* We use right shift and left shift to clear the LSB of the address
17088              we jump to instead of using bic, to avoid having to use an extra
17089              register on Thumb-1.  */
17090           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17091           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17092           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17093           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17094
17095           /* Clearing all registers that leak before doing a non-secure
17096              call.  */
17097           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17098             {
17099               if (!(to_clear_mask & (1LL << regno)))
17100                 continue;
17101
17102               /* If regno is an even vfp register and its successor is also to
17103                  be cleared, use vmov.  */
17104               if (IS_VFP_REGNUM (regno))
17105                 {
17106                   if (TARGET_VFP_DOUBLE
17107                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17108                       && to_clear_mask & (1LL << (regno + 1)))
17109                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17110                                     CONST0_RTX (DFmode));
17111                   else
17112                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17113                                     CONST0_RTX (SFmode));
17114                 }
17115               else
17116                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17117             }
17118
17119           seq = get_insns ();
17120           end_sequence ();
17121           emit_insn_before (seq, insn);
17122
17123         }
17124     }
17125 }
17126
17127 /* Rewrite move insn into subtract of 0 if the condition codes will
17128    be useful in next conditional jump insn.  */
17129
17130 static void
17131 thumb1_reorg (void)
17132 {
17133   basic_block bb;
17134
17135   FOR_EACH_BB_FN (bb, cfun)
17136     {
17137       rtx dest, src;
17138       rtx cmp, op0, op1, set = NULL;
17139       rtx_insn *prev, *insn = BB_END (bb);
17140       bool insn_clobbered = false;
17141
17142       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17143         insn = PREV_INSN (insn);
17144
17145       /* Find the last cbranchsi4_insn in basic block BB.  */
17146       if (insn == BB_HEAD (bb)
17147           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17148         continue;
17149
17150       /* Get the register with which we are comparing.  */
17151       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17152       op0 = XEXP (cmp, 0);
17153       op1 = XEXP (cmp, 1);
17154
17155       /* Check that comparison is against ZERO.  */
17156       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17157         continue;
17158
17159       /* Find the first flag setting insn before INSN in basic block BB.  */
17160       gcc_assert (insn != BB_HEAD (bb));
17161       for (prev = PREV_INSN (insn);
17162            (!insn_clobbered
17163             && prev != BB_HEAD (bb)
17164             && (NOTE_P (prev)
17165                 || DEBUG_INSN_P (prev)
17166                 || ((set = single_set (prev)) != NULL
17167                     && get_attr_conds (prev) == CONDS_NOCOND)));
17168            prev = PREV_INSN (prev))
17169         {
17170           if (reg_set_p (op0, prev))
17171             insn_clobbered = true;
17172         }
17173
17174       /* Skip if op0 is clobbered by insn other than prev. */
17175       if (insn_clobbered)
17176         continue;
17177
17178       if (!set)
17179         continue;
17180
17181       dest = SET_DEST (set);
17182       src = SET_SRC (set);
17183       if (!low_register_operand (dest, SImode)
17184           || !low_register_operand (src, SImode))
17185         continue;
17186
17187       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17188          in INSN.  Both src and dest of the move insn are checked.  */
17189       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17190         {
17191           dest = copy_rtx (dest);
17192           src = copy_rtx (src);
17193           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17194           PATTERN (prev) = gen_rtx_SET (dest, src);
17195           INSN_CODE (prev) = -1;
17196           /* Set test register in INSN to dest.  */
17197           XEXP (cmp, 0) = copy_rtx (dest);
17198           INSN_CODE (insn) = -1;
17199         }
17200     }
17201 }
17202
17203 /* Convert instructions to their cc-clobbering variant if possible, since
17204    that allows us to use smaller encodings.  */
17205
17206 static void
17207 thumb2_reorg (void)
17208 {
17209   basic_block bb;
17210   regset_head live;
17211
17212   INIT_REG_SET (&live);
17213
17214   /* We are freeing block_for_insn in the toplev to keep compatibility
17215      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17216   compute_bb_for_insn ();
17217   df_analyze ();
17218
17219   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17220
17221   FOR_EACH_BB_FN (bb, cfun)
17222     {
17223       if ((current_tune->disparage_flag_setting_t16_encodings
17224            == tune_params::DISPARAGE_FLAGS_ALL)
17225           && optimize_bb_for_speed_p (bb))
17226         continue;
17227
17228       rtx_insn *insn;
17229       Convert_Action action = SKIP;
17230       Convert_Action action_for_partial_flag_setting
17231         = ((current_tune->disparage_flag_setting_t16_encodings
17232             != tune_params::DISPARAGE_FLAGS_NEITHER)
17233            && optimize_bb_for_speed_p (bb))
17234           ? SKIP : CONV;
17235
17236       COPY_REG_SET (&live, DF_LR_OUT (bb));
17237       df_simulate_initialize_backwards (bb, &live);
17238       FOR_BB_INSNS_REVERSE (bb, insn)
17239         {
17240           if (NONJUMP_INSN_P (insn)
17241               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17242               && GET_CODE (PATTERN (insn)) == SET)
17243             {
17244               action = SKIP;
17245               rtx pat = PATTERN (insn);
17246               rtx dst = XEXP (pat, 0);
17247               rtx src = XEXP (pat, 1);
17248               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17249
17250               if (UNARY_P (src) || BINARY_P (src))
17251                   op0 = XEXP (src, 0);
17252
17253               if (BINARY_P (src))
17254                   op1 = XEXP (src, 1);
17255
17256               if (low_register_operand (dst, SImode))
17257                 {
17258                   switch (GET_CODE (src))
17259                     {
17260                     case PLUS:
17261                       /* Adding two registers and storing the result
17262                          in the first source is already a 16-bit
17263                          operation.  */
17264                       if (rtx_equal_p (dst, op0)
17265                           && register_operand (op1, SImode))
17266                         break;
17267
17268                       if (low_register_operand (op0, SImode))
17269                         {
17270                           /* ADDS <Rd>,<Rn>,<Rm>  */
17271                           if (low_register_operand (op1, SImode))
17272                             action = CONV;
17273                           /* ADDS <Rdn>,#<imm8>  */
17274                           /* SUBS <Rdn>,#<imm8>  */
17275                           else if (rtx_equal_p (dst, op0)
17276                                    && CONST_INT_P (op1)
17277                                    && IN_RANGE (INTVAL (op1), -255, 255))
17278                             action = CONV;
17279                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17280                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17281                           else if (CONST_INT_P (op1)
17282                                    && IN_RANGE (INTVAL (op1), -7, 7))
17283                             action = CONV;
17284                         }
17285                       /* ADCS <Rd>, <Rn>  */
17286                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17287                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17288                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17289                                                        SImode)
17290                               && COMPARISON_P (op1)
17291                               && cc_register (XEXP (op1, 0), VOIDmode)
17292                               && maybe_get_arm_condition_code (op1) == ARM_CS
17293                               && XEXP (op1, 1) == const0_rtx)
17294                         action = CONV;
17295                       break;
17296
17297                     case MINUS:
17298                       /* RSBS <Rd>,<Rn>,#0
17299                          Not handled here: see NEG below.  */
17300                       /* SUBS <Rd>,<Rn>,#<imm3>
17301                          SUBS <Rdn>,#<imm8>
17302                          Not handled here: see PLUS above.  */
17303                       /* SUBS <Rd>,<Rn>,<Rm>  */
17304                       if (low_register_operand (op0, SImode)
17305                           && low_register_operand (op1, SImode))
17306                             action = CONV;
17307                       break;
17308
17309                     case MULT:
17310                       /* MULS <Rdm>,<Rn>,<Rdm>
17311                          As an exception to the rule, this is only used
17312                          when optimizing for size since MULS is slow on all
17313                          known implementations.  We do not even want to use
17314                          MULS in cold code, if optimizing for speed, so we
17315                          test the global flag here.  */
17316                       if (!optimize_size)
17317                         break;
17318                       /* Fall through.  */
17319                     case AND:
17320                     case IOR:
17321                     case XOR:
17322                       /* ANDS <Rdn>,<Rm>  */
17323                       if (rtx_equal_p (dst, op0)
17324                           && low_register_operand (op1, SImode))
17325                         action = action_for_partial_flag_setting;
17326                       else if (rtx_equal_p (dst, op1)
17327                                && low_register_operand (op0, SImode))
17328                         action = action_for_partial_flag_setting == SKIP
17329                                  ? SKIP : SWAP_CONV;
17330                       break;
17331
17332                     case ASHIFTRT:
17333                     case ASHIFT:
17334                     case LSHIFTRT:
17335                       /* ASRS <Rdn>,<Rm> */
17336                       /* LSRS <Rdn>,<Rm> */
17337                       /* LSLS <Rdn>,<Rm> */
17338                       if (rtx_equal_p (dst, op0)
17339                           && low_register_operand (op1, SImode))
17340                         action = action_for_partial_flag_setting;
17341                       /* ASRS <Rd>,<Rm>,#<imm5> */
17342                       /* LSRS <Rd>,<Rm>,#<imm5> */
17343                       /* LSLS <Rd>,<Rm>,#<imm5> */
17344                       else if (low_register_operand (op0, SImode)
17345                                && CONST_INT_P (op1)
17346                                && IN_RANGE (INTVAL (op1), 0, 31))
17347                         action = action_for_partial_flag_setting;
17348                       break;
17349
17350                     case ROTATERT:
17351                       /* RORS <Rdn>,<Rm>  */
17352                       if (rtx_equal_p (dst, op0)
17353                           && low_register_operand (op1, SImode))
17354                         action = action_for_partial_flag_setting;
17355                       break;
17356
17357                     case NOT:
17358                       /* MVNS <Rd>,<Rm>  */
17359                       if (low_register_operand (op0, SImode))
17360                         action = action_for_partial_flag_setting;
17361                       break;
17362
17363                     case NEG:
17364                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17365                       if (low_register_operand (op0, SImode))
17366                         action = CONV;
17367                       break;
17368
17369                     case CONST_INT:
17370                       /* MOVS <Rd>,#<imm8>  */
17371                       if (CONST_INT_P (src)
17372                           && IN_RANGE (INTVAL (src), 0, 255))
17373                         action = action_for_partial_flag_setting;
17374                       break;
17375
17376                     case REG:
17377                       /* MOVS and MOV<c> with registers have different
17378                          encodings, so are not relevant here.  */
17379                       break;
17380
17381                     default:
17382                       break;
17383                     }
17384                 }
17385
17386               if (action != SKIP)
17387                 {
17388                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17389                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17390                   rtvec vec;
17391
17392                   if (action == SWAP_CONV)
17393                     {
17394                       src = copy_rtx (src);
17395                       XEXP (src, 0) = op1;
17396                       XEXP (src, 1) = op0;
17397                       pat = gen_rtx_SET (dst, src);
17398                       vec = gen_rtvec (2, pat, clobber);
17399                     }
17400                   else /* action == CONV */
17401                     vec = gen_rtvec (2, pat, clobber);
17402
17403                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17404                   INSN_CODE (insn) = -1;
17405                 }
17406             }
17407
17408           if (NONDEBUG_INSN_P (insn))
17409             df_simulate_one_insn_backwards (bb, insn, &live);
17410         }
17411     }
17412
17413   CLEAR_REG_SET (&live);
17414 }
17415
17416 /* Gcc puts the pool in the wrong place for ARM, since we can only
17417    load addresses a limited distance around the pc.  We do some
17418    special munging to move the constant pool values to the correct
17419    point in the code.  */
17420 static void
17421 arm_reorg (void)
17422 {
17423   rtx_insn *insn;
17424   HOST_WIDE_INT address = 0;
17425   Mfix * fix;
17426
17427   if (use_cmse)
17428     cmse_nonsecure_call_clear_caller_saved ();
17429   if (TARGET_THUMB1)
17430     thumb1_reorg ();
17431   else if (TARGET_THUMB2)
17432     thumb2_reorg ();
17433
17434   /* Ensure all insns that must be split have been split at this point.
17435      Otherwise, the pool placement code below may compute incorrect
17436      insn lengths.  Note that when optimizing, all insns have already
17437      been split at this point.  */
17438   if (!optimize)
17439     split_all_insns_noflow ();
17440
17441   /* Make sure we do not attempt to create a literal pool even though it should
17442      no longer be necessary to create any.  */
17443   if (arm_disable_literal_pool)
17444     return ;
17445
17446   minipool_fix_head = minipool_fix_tail = NULL;
17447
17448   /* The first insn must always be a note, or the code below won't
17449      scan it properly.  */
17450   insn = get_insns ();
17451   gcc_assert (NOTE_P (insn));
17452   minipool_pad = 0;
17453
17454   /* Scan all the insns and record the operands that will need fixing.  */
17455   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17456     {
17457       if (BARRIER_P (insn))
17458         push_minipool_barrier (insn, address);
17459       else if (INSN_P (insn))
17460         {
17461           rtx_jump_table_data *table;
17462
17463           note_invalid_constants (insn, address, true);
17464           address += get_attr_length (insn);
17465
17466           /* If the insn is a vector jump, add the size of the table
17467              and skip the table.  */
17468           if (tablejump_p (insn, NULL, &table))
17469             {
17470               address += get_jump_table_size (table);
17471               insn = table;
17472             }
17473         }
17474       else if (LABEL_P (insn))
17475         /* Add the worst-case padding due to alignment.  We don't add
17476            the _current_ padding because the minipool insertions
17477            themselves might change it.  */
17478         address += get_label_padding (insn);
17479     }
17480
17481   fix = minipool_fix_head;
17482
17483   /* Now scan the fixups and perform the required changes.  */
17484   while (fix)
17485     {
17486       Mfix * ftmp;
17487       Mfix * fdel;
17488       Mfix *  last_added_fix;
17489       Mfix * last_barrier = NULL;
17490       Mfix * this_fix;
17491
17492       /* Skip any further barriers before the next fix.  */
17493       while (fix && BARRIER_P (fix->insn))
17494         fix = fix->next;
17495
17496       /* No more fixes.  */
17497       if (fix == NULL)
17498         break;
17499
17500       last_added_fix = NULL;
17501
17502       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17503         {
17504           if (BARRIER_P (ftmp->insn))
17505             {
17506               if (ftmp->address >= minipool_vector_head->max_address)
17507                 break;
17508
17509               last_barrier = ftmp;
17510             }
17511           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17512             break;
17513
17514           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17515         }
17516
17517       /* If we found a barrier, drop back to that; any fixes that we
17518          could have reached but come after the barrier will now go in
17519          the next mini-pool.  */
17520       if (last_barrier != NULL)
17521         {
17522           /* Reduce the refcount for those fixes that won't go into this
17523              pool after all.  */
17524           for (fdel = last_barrier->next;
17525                fdel && fdel != ftmp;
17526                fdel = fdel->next)
17527             {
17528               fdel->minipool->refcount--;
17529               fdel->minipool = NULL;
17530             }
17531
17532           ftmp = last_barrier;
17533         }
17534       else
17535         {
17536           /* ftmp is first fix that we can't fit into this pool and
17537              there no natural barriers that we could use.  Insert a
17538              new barrier in the code somewhere between the previous
17539              fix and this one, and arrange to jump around it.  */
17540           HOST_WIDE_INT max_address;
17541
17542           /* The last item on the list of fixes must be a barrier, so
17543              we can never run off the end of the list of fixes without
17544              last_barrier being set.  */
17545           gcc_assert (ftmp);
17546
17547           max_address = minipool_vector_head->max_address;
17548           /* Check that there isn't another fix that is in range that
17549              we couldn't fit into this pool because the pool was
17550              already too large: we need to put the pool before such an
17551              instruction.  The pool itself may come just after the
17552              fix because create_fix_barrier also allows space for a
17553              jump instruction.  */
17554           if (ftmp->address < max_address)
17555             max_address = ftmp->address + 1;
17556
17557           last_barrier = create_fix_barrier (last_added_fix, max_address);
17558         }
17559
17560       assign_minipool_offsets (last_barrier);
17561
17562       while (ftmp)
17563         {
17564           if (!BARRIER_P (ftmp->insn)
17565               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17566                   == NULL))
17567             break;
17568
17569           ftmp = ftmp->next;
17570         }
17571
17572       /* Scan over the fixes we have identified for this pool, fixing them
17573          up and adding the constants to the pool itself.  */
17574       for (this_fix = fix; this_fix && ftmp != this_fix;
17575            this_fix = this_fix->next)
17576         if (!BARRIER_P (this_fix->insn))
17577           {
17578             rtx addr
17579               = plus_constant (Pmode,
17580                                gen_rtx_LABEL_REF (VOIDmode,
17581                                                   minipool_vector_label),
17582                                this_fix->minipool->offset);
17583             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17584           }
17585
17586       dump_minipool (last_barrier->insn);
17587       fix = ftmp;
17588     }
17589
17590   /* From now on we must synthesize any constants that we can't handle
17591      directly.  This can happen if the RTL gets split during final
17592      instruction generation.  */
17593   cfun->machine->after_arm_reorg = 1;
17594
17595   /* Free the minipool memory.  */
17596   obstack_free (&minipool_obstack, minipool_startobj);
17597 }
17598 \f
17599 /* Routines to output assembly language.  */
17600
17601 /* Return string representation of passed in real value.  */
17602 static const char *
17603 fp_const_from_val (REAL_VALUE_TYPE *r)
17604 {
17605   if (!fp_consts_inited)
17606     init_fp_table ();
17607
17608   gcc_assert (real_equal (r, &value_fp0));
17609   return "0";
17610 }
17611
17612 /* OPERANDS[0] is the entire list of insns that constitute pop,
17613    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17614    is in the list, UPDATE is true iff the list contains explicit
17615    update of base register.  */
17616 void
17617 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17618                          bool update)
17619 {
17620   int i;
17621   char pattern[100];
17622   int offset;
17623   const char *conditional;
17624   int num_saves = XVECLEN (operands[0], 0);
17625   unsigned int regno;
17626   unsigned int regno_base = REGNO (operands[1]);
17627   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17628
17629   offset = 0;
17630   offset += update ? 1 : 0;
17631   offset += return_pc ? 1 : 0;
17632
17633   /* Is the base register in the list?  */
17634   for (i = offset; i < num_saves; i++)
17635     {
17636       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17637       /* If SP is in the list, then the base register must be SP.  */
17638       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17639       /* If base register is in the list, there must be no explicit update.  */
17640       if (regno == regno_base)
17641         gcc_assert (!update);
17642     }
17643
17644   conditional = reverse ? "%?%D0" : "%?%d0";
17645   /* Can't use POP if returning from an interrupt.  */
17646   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17647     sprintf (pattern, "pop%s\t{", conditional);
17648   else
17649     {
17650       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651          It's just a convention, their semantics are identical.  */
17652       if (regno_base == SP_REGNUM)
17653         sprintf (pattern, "ldmfd%s\t", conditional);
17654       else if (update)
17655         sprintf (pattern, "ldmia%s\t", conditional);
17656       else
17657         sprintf (pattern, "ldm%s\t", conditional);
17658
17659       strcat (pattern, reg_names[regno_base]);
17660       if (update)
17661         strcat (pattern, "!, {");
17662       else
17663         strcat (pattern, ", {");
17664     }
17665
17666   /* Output the first destination register.  */
17667   strcat (pattern,
17668           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17669
17670   /* Output the rest of the destination registers.  */
17671   for (i = offset + 1; i < num_saves; i++)
17672     {
17673       strcat (pattern, ", ");
17674       strcat (pattern,
17675               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17676     }
17677
17678   strcat (pattern, "}");
17679
17680   if (interrupt_p && return_pc)
17681     strcat (pattern, "^");
17682
17683   output_asm_insn (pattern, &cond);
17684 }
17685
17686
17687 /* Output the assembly for a store multiple.  */
17688
17689 const char *
17690 vfp_output_vstmd (rtx * operands)
17691 {
17692   char pattern[100];
17693   int p;
17694   int base;
17695   int i;
17696   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17697                    ? XEXP (operands[0], 0)
17698                    : XEXP (XEXP (operands[0], 0), 0);
17699   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17700
17701   if (push_p)
17702     strcpy (pattern, "vpush%?.64\t{%P1");
17703   else
17704     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17705
17706   p = strlen (pattern);
17707
17708   gcc_assert (REG_P (operands[1]));
17709
17710   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17711   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17712     {
17713       p += sprintf (&pattern[p], ", d%d", base + i);
17714     }
17715   strcpy (&pattern[p], "}");
17716
17717   output_asm_insn (pattern, operands);
17718   return "";
17719 }
17720
17721
17722 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17723    number of bytes pushed.  */
17724
17725 static int
17726 vfp_emit_fstmd (int base_reg, int count)
17727 {
17728   rtx par;
17729   rtx dwarf;
17730   rtx tmp, reg;
17731   int i;
17732
17733   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17734      register pairs are stored by a store multiple insn.  We avoid this
17735      by pushing an extra pair.  */
17736   if (count == 2 && !arm_arch6)
17737     {
17738       if (base_reg == LAST_VFP_REGNUM - 3)
17739         base_reg -= 2;
17740       count++;
17741     }
17742
17743   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17744      larger stores into multiple parts (up to a maximum of two, in
17745      practice).  */
17746   if (count > 16)
17747     {
17748       int saved;
17749       /* NOTE: base_reg is an internal register number, so each D register
17750          counts as 2.  */
17751       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17752       saved += vfp_emit_fstmd (base_reg, 16);
17753       return saved;
17754     }
17755
17756   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17757   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17758
17759   reg = gen_rtx_REG (DFmode, base_reg);
17760   base_reg += 2;
17761
17762   XVECEXP (par, 0, 0)
17763     = gen_rtx_SET (gen_frame_mem
17764                    (BLKmode,
17765                     gen_rtx_PRE_MODIFY (Pmode,
17766                                         stack_pointer_rtx,
17767                                         plus_constant
17768                                         (Pmode, stack_pointer_rtx,
17769                                          - (count * 8)))
17770                     ),
17771                    gen_rtx_UNSPEC (BLKmode,
17772                                    gen_rtvec (1, reg),
17773                                    UNSPEC_PUSH_MULT));
17774
17775   tmp = gen_rtx_SET (stack_pointer_rtx,
17776                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777   RTX_FRAME_RELATED_P (tmp) = 1;
17778   XVECEXP (dwarf, 0, 0) = tmp;
17779
17780   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17781   RTX_FRAME_RELATED_P (tmp) = 1;
17782   XVECEXP (dwarf, 0, 1) = tmp;
17783
17784   for (i = 1; i < count; i++)
17785     {
17786       reg = gen_rtx_REG (DFmode, base_reg);
17787       base_reg += 2;
17788       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17789
17790       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17791                                         plus_constant (Pmode,
17792                                                        stack_pointer_rtx,
17793                                                        i * 8)),
17794                          reg);
17795       RTX_FRAME_RELATED_P (tmp) = 1;
17796       XVECEXP (dwarf, 0, i + 1) = tmp;
17797     }
17798
17799   par = emit_insn (par);
17800   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17801   RTX_FRAME_RELATED_P (par) = 1;
17802
17803   return count * 8;
17804 }
17805
17806 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17807    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17808
17809 bool
17810 detect_cmse_nonsecure_call (tree addr)
17811 {
17812   if (!addr)
17813     return FALSE;
17814
17815   tree fntype = TREE_TYPE (addr);
17816   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17817                                     TYPE_ATTRIBUTES (fntype)))
17818     return TRUE;
17819   return FALSE;
17820 }
17821
17822
17823 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17824    the call target.  */
17825
17826 void
17827 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17828 {
17829   rtx insn;
17830
17831   insn = emit_call_insn (pat);
17832
17833   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17834      If the call might use such an entry, add a use of the PIC register
17835      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17836   if (TARGET_VXWORKS_RTP
17837       && flag_pic
17838       && !sibcall
17839       && GET_CODE (addr) == SYMBOL_REF
17840       && (SYMBOL_REF_DECL (addr)
17841           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17842           : !SYMBOL_REF_LOCAL_P (addr)))
17843     {
17844       require_pic_register ();
17845       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17846     }
17847
17848   if (TARGET_AAPCS_BASED)
17849     {
17850       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17851          linker.  We need to add an IP clobber to allow setting
17852          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17853          is not needed since it's a fixed register.  */
17854       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17855       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17856     }
17857 }
17858
17859 /* Output a 'call' insn.  */
17860 const char *
17861 output_call (rtx *operands)
17862 {
17863   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17864
17865   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17866   if (REGNO (operands[0]) == LR_REGNUM)
17867     {
17868       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17869       output_asm_insn ("mov%?\t%0, %|lr", operands);
17870     }
17871
17872   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17873
17874   if (TARGET_INTERWORK || arm_arch4t)
17875     output_asm_insn ("bx%?\t%0", operands);
17876   else
17877     output_asm_insn ("mov%?\t%|pc, %0", operands);
17878
17879   return "";
17880 }
17881
17882 /* Output a move from arm registers to arm registers of a long double
17883    OPERANDS[0] is the destination.
17884    OPERANDS[1] is the source.  */
17885 const char *
17886 output_mov_long_double_arm_from_arm (rtx *operands)
17887 {
17888   /* We have to be careful here because the two might overlap.  */
17889   int dest_start = REGNO (operands[0]);
17890   int src_start = REGNO (operands[1]);
17891   rtx ops[2];
17892   int i;
17893
17894   if (dest_start < src_start)
17895     {
17896       for (i = 0; i < 3; i++)
17897         {
17898           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17899           ops[1] = gen_rtx_REG (SImode, src_start + i);
17900           output_asm_insn ("mov%?\t%0, %1", ops);
17901         }
17902     }
17903   else
17904     {
17905       for (i = 2; i >= 0; i--)
17906         {
17907           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17908           ops[1] = gen_rtx_REG (SImode, src_start + i);
17909           output_asm_insn ("mov%?\t%0, %1", ops);
17910         }
17911     }
17912
17913   return "";
17914 }
17915
17916 void
17917 arm_emit_movpair (rtx dest, rtx src)
17918  {
17919   /* If the src is an immediate, simplify it.  */
17920   if (CONST_INT_P (src))
17921     {
17922       HOST_WIDE_INT val = INTVAL (src);
17923       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17924       if ((val >> 16) & 0x0000ffff)
17925         {
17926           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17927                                                GEN_INT (16)),
17928                          GEN_INT ((val >> 16) & 0x0000ffff));
17929           rtx_insn *insn = get_last_insn ();
17930           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17931         }
17932       return;
17933     }
17934    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17935    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17936    rtx_insn *insn = get_last_insn ();
17937    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17938  }
17939
17940 /* Output a move between double words.  It must be REG<-MEM
17941    or MEM<-REG.  */
17942 const char *
17943 output_move_double (rtx *operands, bool emit, int *count)
17944 {
17945   enum rtx_code code0 = GET_CODE (operands[0]);
17946   enum rtx_code code1 = GET_CODE (operands[1]);
17947   rtx otherops[3];
17948   if (count)
17949     *count = 1;
17950
17951   /* The only case when this might happen is when
17952      you are looking at the length of a DImode instruction
17953      that has an invalid constant in it.  */
17954   if (code0 == REG && code1 != MEM)
17955     {
17956       gcc_assert (!emit);
17957       *count = 2;
17958       return "";
17959     }
17960
17961   if (code0 == REG)
17962     {
17963       unsigned int reg0 = REGNO (operands[0]);
17964
17965       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17966
17967       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17968
17969       switch (GET_CODE (XEXP (operands[1], 0)))
17970         {
17971         case REG:
17972
17973           if (emit)
17974             {
17975               if (TARGET_LDRD
17976                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17977                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17978               else
17979                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17980             }
17981           break;
17982
17983         case PRE_INC:
17984           gcc_assert (TARGET_LDRD);
17985           if (emit)
17986             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17987           break;
17988
17989         case PRE_DEC:
17990           if (emit)
17991             {
17992               if (TARGET_LDRD)
17993                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17994               else
17995                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17996             }
17997           break;
17998
17999         case POST_INC:
18000           if (emit)
18001             {
18002               if (TARGET_LDRD)
18003                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18004               else
18005                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18006             }
18007           break;
18008
18009         case POST_DEC:
18010           gcc_assert (TARGET_LDRD);
18011           if (emit)
18012             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18013           break;
18014
18015         case PRE_MODIFY:
18016         case POST_MODIFY:
18017           /* Autoicrement addressing modes should never have overlapping
18018              base and destination registers, and overlapping index registers
18019              are already prohibited, so this doesn't need to worry about
18020              fix_cm3_ldrd.  */
18021           otherops[0] = operands[0];
18022           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18023           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18024
18025           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18026             {
18027               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18028                 {
18029                   /* Registers overlap so split out the increment.  */
18030                   if (emit)
18031                     {
18032                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18033                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18034                     }
18035                   if (count)
18036                     *count = 2;
18037                 }
18038               else
18039                 {
18040                   /* Use a single insn if we can.
18041                      FIXME: IWMMXT allows offsets larger than ldrd can
18042                      handle, fix these up with a pair of ldr.  */
18043                   if (TARGET_THUMB2
18044                       || !CONST_INT_P (otherops[2])
18045                       || (INTVAL (otherops[2]) > -256
18046                           && INTVAL (otherops[2]) < 256))
18047                     {
18048                       if (emit)
18049                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18050                     }
18051                   else
18052                     {
18053                       if (emit)
18054                         {
18055                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18056                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18057                         }
18058                       if (count)
18059                         *count = 2;
18060
18061                     }
18062                 }
18063             }
18064           else
18065             {
18066               /* Use a single insn if we can.
18067                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18068                  fix these up with a pair of ldr.  */
18069               if (TARGET_THUMB2
18070                   || !CONST_INT_P (otherops[2])
18071                   || (INTVAL (otherops[2]) > -256
18072                       && INTVAL (otherops[2]) < 256))
18073                 {
18074                   if (emit)
18075                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18076                 }
18077               else
18078                 {
18079                   if (emit)
18080                     {
18081                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18082                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18083                     }
18084                   if (count)
18085                     *count = 2;
18086                 }
18087             }
18088           break;
18089
18090         case LABEL_REF:
18091         case CONST:
18092           /* We might be able to use ldrd %0, %1 here.  However the range is
18093              different to ldr/adr, and it is broken on some ARMv7-M
18094              implementations.  */
18095           /* Use the second register of the pair to avoid problematic
18096              overlap.  */
18097           otherops[1] = operands[1];
18098           if (emit)
18099             output_asm_insn ("adr%?\t%0, %1", otherops);
18100           operands[1] = otherops[0];
18101           if (emit)
18102             {
18103               if (TARGET_LDRD)
18104                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18105               else
18106                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18107             }
18108
18109           if (count)
18110             *count = 2;
18111           break;
18112
18113           /* ??? This needs checking for thumb2.  */
18114         default:
18115           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18116                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18117             {
18118               otherops[0] = operands[0];
18119               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18120               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18121
18122               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18123                 {
18124                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18125                     {
18126                       switch ((int) INTVAL (otherops[2]))
18127                         {
18128                         case -8:
18129                           if (emit)
18130                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18131                           return "";
18132                         case -4:
18133                           if (TARGET_THUMB2)
18134                             break;
18135                           if (emit)
18136                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18137                           return "";
18138                         case 4:
18139                           if (TARGET_THUMB2)
18140                             break;
18141                           if (emit)
18142                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18143                           return "";
18144                         }
18145                     }
18146                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18147                   operands[1] = otherops[0];
18148                   if (TARGET_LDRD
18149                       && (REG_P (otherops[2])
18150                           || TARGET_THUMB2
18151                           || (CONST_INT_P (otherops[2])
18152                               && INTVAL (otherops[2]) > -256
18153                               && INTVAL (otherops[2]) < 256)))
18154                     {
18155                       if (reg_overlap_mentioned_p (operands[0],
18156                                                    otherops[2]))
18157                         {
18158                           /* Swap base and index registers over to
18159                              avoid a conflict.  */
18160                           std::swap (otherops[1], otherops[2]);
18161                         }
18162                       /* If both registers conflict, it will usually
18163                          have been fixed by a splitter.  */
18164                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18165                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18166                         {
18167                           if (emit)
18168                             {
18169                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18170                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18171                             }
18172                           if (count)
18173                             *count = 2;
18174                         }
18175                       else
18176                         {
18177                           otherops[0] = operands[0];
18178                           if (emit)
18179                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18180                         }
18181                       return "";
18182                     }
18183
18184                   if (CONST_INT_P (otherops[2]))
18185                     {
18186                       if (emit)
18187                         {
18188                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18189                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18190                           else
18191                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18192                         }
18193                     }
18194                   else
18195                     {
18196                       if (emit)
18197                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18198                     }
18199                 }
18200               else
18201                 {
18202                   if (emit)
18203                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18204                 }
18205
18206               if (count)
18207                 *count = 2;
18208
18209               if (TARGET_LDRD)
18210                 return "ldrd%?\t%0, [%1]";
18211
18212               return "ldmia%?\t%1, %M0";
18213             }
18214           else
18215             {
18216               otherops[1] = adjust_address (operands[1], SImode, 4);
18217               /* Take care of overlapping base/data reg.  */
18218               if (reg_mentioned_p (operands[0], operands[1]))
18219                 {
18220                   if (emit)
18221                     {
18222                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18223                       output_asm_insn ("ldr%?\t%0, %1", operands);
18224                     }
18225                   if (count)
18226                     *count = 2;
18227
18228                 }
18229               else
18230                 {
18231                   if (emit)
18232                     {
18233                       output_asm_insn ("ldr%?\t%0, %1", operands);
18234                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18235                     }
18236                   if (count)
18237                     *count = 2;
18238                 }
18239             }
18240         }
18241     }
18242   else
18243     {
18244       /* Constraints should ensure this.  */
18245       gcc_assert (code0 == MEM && code1 == REG);
18246       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18247                   || (TARGET_ARM && TARGET_LDRD));
18248
18249       switch (GET_CODE (XEXP (operands[0], 0)))
18250         {
18251         case REG:
18252           if (emit)
18253             {
18254               if (TARGET_LDRD)
18255                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18256               else
18257                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18258             }
18259           break;
18260
18261         case PRE_INC:
18262           gcc_assert (TARGET_LDRD);
18263           if (emit)
18264             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18265           break;
18266
18267         case PRE_DEC:
18268           if (emit)
18269             {
18270               if (TARGET_LDRD)
18271                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18272               else
18273                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18274             }
18275           break;
18276
18277         case POST_INC:
18278           if (emit)
18279             {
18280               if (TARGET_LDRD)
18281                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18282               else
18283                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18284             }
18285           break;
18286
18287         case POST_DEC:
18288           gcc_assert (TARGET_LDRD);
18289           if (emit)
18290             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18291           break;
18292
18293         case PRE_MODIFY:
18294         case POST_MODIFY:
18295           otherops[0] = operands[1];
18296           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18297           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18298
18299           /* IWMMXT allows offsets larger than ldrd can handle,
18300              fix these up with a pair of ldr.  */
18301           if (!TARGET_THUMB2
18302               && CONST_INT_P (otherops[2])
18303               && (INTVAL(otherops[2]) <= -256
18304                   || INTVAL(otherops[2]) >= 256))
18305             {
18306               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18307                 {
18308                   if (emit)
18309                     {
18310                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18311                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18312                     }
18313                   if (count)
18314                     *count = 2;
18315                 }
18316               else
18317                 {
18318                   if (emit)
18319                     {
18320                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18321                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18322                     }
18323                   if (count)
18324                     *count = 2;
18325                 }
18326             }
18327           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18328             {
18329               if (emit)
18330                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18331             }
18332           else
18333             {
18334               if (emit)
18335                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18336             }
18337           break;
18338
18339         case PLUS:
18340           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18341           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18342             {
18343               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18344                 {
18345                 case -8:
18346                   if (emit)
18347                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18348                   return "";
18349
18350                 case -4:
18351                   if (TARGET_THUMB2)
18352                     break;
18353                   if (emit)
18354                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18355                   return "";
18356
18357                 case 4:
18358                   if (TARGET_THUMB2)
18359                     break;
18360                   if (emit)
18361                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18362                   return "";
18363                 }
18364             }
18365           if (TARGET_LDRD
18366               && (REG_P (otherops[2])
18367                   || TARGET_THUMB2
18368                   || (CONST_INT_P (otherops[2])
18369                       && INTVAL (otherops[2]) > -256
18370                       && INTVAL (otherops[2]) < 256)))
18371             {
18372               otherops[0] = operands[1];
18373               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18374               if (emit)
18375                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18376               return "";
18377             }
18378           /* Fall through */
18379
18380         default:
18381           otherops[0] = adjust_address (operands[0], SImode, 4);
18382           otherops[1] = operands[1];
18383           if (emit)
18384             {
18385               output_asm_insn ("str%?\t%1, %0", operands);
18386               output_asm_insn ("str%?\t%H1, %0", otherops);
18387             }
18388           if (count)
18389             *count = 2;
18390         }
18391     }
18392
18393   return "";
18394 }
18395
18396 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18397    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18398
18399 const char *
18400 output_move_quad (rtx *operands)
18401 {
18402   if (REG_P (operands[0]))
18403     {
18404       /* Load, or reg->reg move.  */
18405
18406       if (MEM_P (operands[1]))
18407         {
18408           switch (GET_CODE (XEXP (operands[1], 0)))
18409             {
18410             case REG:
18411               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18412               break;
18413
18414             case LABEL_REF:
18415             case CONST:
18416               output_asm_insn ("adr%?\t%0, %1", operands);
18417               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18418               break;
18419
18420             default:
18421               gcc_unreachable ();
18422             }
18423         }
18424       else
18425         {
18426           rtx ops[2];
18427           int dest, src, i;
18428
18429           gcc_assert (REG_P (operands[1]));
18430
18431           dest = REGNO (operands[0]);
18432           src = REGNO (operands[1]);
18433
18434           /* This seems pretty dumb, but hopefully GCC won't try to do it
18435              very often.  */
18436           if (dest < src)
18437             for (i = 0; i < 4; i++)
18438               {
18439                 ops[0] = gen_rtx_REG (SImode, dest + i);
18440                 ops[1] = gen_rtx_REG (SImode, src + i);
18441                 output_asm_insn ("mov%?\t%0, %1", ops);
18442               }
18443           else
18444             for (i = 3; i >= 0; i--)
18445               {
18446                 ops[0] = gen_rtx_REG (SImode, dest + i);
18447                 ops[1] = gen_rtx_REG (SImode, src + i);
18448                 output_asm_insn ("mov%?\t%0, %1", ops);
18449               }
18450         }
18451     }
18452   else
18453     {
18454       gcc_assert (MEM_P (operands[0]));
18455       gcc_assert (REG_P (operands[1]));
18456       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18457
18458       switch (GET_CODE (XEXP (operands[0], 0)))
18459         {
18460         case REG:
18461           output_asm_insn ("stm%?\t%m0, %M1", operands);
18462           break;
18463
18464         default:
18465           gcc_unreachable ();
18466         }
18467     }
18468
18469   return "";
18470 }
18471
18472 /* Output a VFP load or store instruction.  */
18473
18474 const char *
18475 output_move_vfp (rtx *operands)
18476 {
18477   rtx reg, mem, addr, ops[2];
18478   int load = REG_P (operands[0]);
18479   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18480   int sp = (!TARGET_VFP_FP16INST
18481             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18482   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18483   const char *templ;
18484   char buff[50];
18485   machine_mode mode;
18486
18487   reg = operands[!load];
18488   mem = operands[load];
18489
18490   mode = GET_MODE (reg);
18491
18492   gcc_assert (REG_P (reg));
18493   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18494   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18495               || mode == SFmode
18496               || mode == DFmode
18497               || mode == HImode
18498               || mode == SImode
18499               || mode == DImode
18500               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18501   gcc_assert (MEM_P (mem));
18502
18503   addr = XEXP (mem, 0);
18504
18505   switch (GET_CODE (addr))
18506     {
18507     case PRE_DEC:
18508       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18509       ops[0] = XEXP (addr, 0);
18510       ops[1] = reg;
18511       break;
18512
18513     case POST_INC:
18514       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18515       ops[0] = XEXP (addr, 0);
18516       ops[1] = reg;
18517       break;
18518
18519     default:
18520       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18521       ops[0] = reg;
18522       ops[1] = mem;
18523       break;
18524     }
18525
18526   sprintf (buff, templ,
18527            load ? "ld" : "st",
18528            dp ? "64" : sp ? "32" : "16",
18529            dp ? "P" : "",
18530            integer_p ? "\t%@ int" : "");
18531   output_asm_insn (buff, ops);
18532
18533   return "";
18534 }
18535
18536 /* Output a Neon double-word or quad-word load or store, or a load
18537    or store for larger structure modes.
18538
18539    WARNING: The ordering of elements is weird in big-endian mode,
18540    because the EABI requires that vectors stored in memory appear
18541    as though they were stored by a VSTM, as required by the EABI.
18542    GCC RTL defines element ordering based on in-memory order.
18543    This can be different from the architectural ordering of elements
18544    within a NEON register. The intrinsics defined in arm_neon.h use the
18545    NEON register element ordering, not the GCC RTL element ordering.
18546
18547    For example, the in-memory ordering of a big-endian a quadword
18548    vector with 16-bit elements when stored from register pair {d0,d1}
18549    will be (lowest address first, d0[N] is NEON register element N):
18550
18551      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18552
18553    When necessary, quadword registers (dN, dN+1) are moved to ARM
18554    registers from rN in the order:
18555
18556      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18557
18558    So that STM/LDM can be used on vectors in ARM registers, and the
18559    same memory layout will result as if VSTM/VLDM were used.
18560
18561    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18562    possible, which allows use of appropriate alignment tags.
18563    Note that the choice of "64" is independent of the actual vector
18564    element size; this size simply ensures that the behavior is
18565    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18566
18567    Due to limitations of those instructions, use of VST1.64/VLD1.64
18568    is not possible if:
18569     - the address contains PRE_DEC, or
18570     - the mode refers to more than 4 double-word registers
18571
18572    In those cases, it would be possible to replace VSTM/VLDM by a
18573    sequence of instructions; this is not currently implemented since
18574    this is not certain to actually improve performance.  */
18575
18576 const char *
18577 output_move_neon (rtx *operands)
18578 {
18579   rtx reg, mem, addr, ops[2];
18580   int regno, nregs, load = REG_P (operands[0]);
18581   const char *templ;
18582   char buff[50];
18583   machine_mode mode;
18584
18585   reg = operands[!load];
18586   mem = operands[load];
18587
18588   mode = GET_MODE (reg);
18589
18590   gcc_assert (REG_P (reg));
18591   regno = REGNO (reg);
18592   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18593   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18594               || NEON_REGNO_OK_FOR_QUAD (regno));
18595   gcc_assert (VALID_NEON_DREG_MODE (mode)
18596               || VALID_NEON_QREG_MODE (mode)
18597               || VALID_NEON_STRUCT_MODE (mode));
18598   gcc_assert (MEM_P (mem));
18599
18600   addr = XEXP (mem, 0);
18601
18602   /* Strip off const from addresses like (const (plus (...))).  */
18603   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18604     addr = XEXP (addr, 0);
18605
18606   switch (GET_CODE (addr))
18607     {
18608     case POST_INC:
18609       /* We have to use vldm / vstm for too-large modes.  */
18610       if (nregs > 4)
18611         {
18612           templ = "v%smia%%?\t%%0!, %%h1";
18613           ops[0] = XEXP (addr, 0);
18614         }
18615       else
18616         {
18617           templ = "v%s1.64\t%%h1, %%A0";
18618           ops[0] = mem;
18619         }
18620       ops[1] = reg;
18621       break;
18622
18623     case PRE_DEC:
18624       /* We have to use vldm / vstm in this case, since there is no
18625          pre-decrement form of the vld1 / vst1 instructions.  */
18626       templ = "v%smdb%%?\t%%0!, %%h1";
18627       ops[0] = XEXP (addr, 0);
18628       ops[1] = reg;
18629       break;
18630
18631     case POST_MODIFY:
18632       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18633       gcc_unreachable ();
18634
18635     case REG:
18636       /* We have to use vldm / vstm for too-large modes.  */
18637       if (nregs > 1)
18638         {
18639           if (nregs > 4)
18640             templ = "v%smia%%?\t%%m0, %%h1";
18641           else
18642             templ = "v%s1.64\t%%h1, %%A0";
18643
18644           ops[0] = mem;
18645           ops[1] = reg;
18646           break;
18647         }
18648       /* Fall through.  */
18649     case LABEL_REF:
18650     case PLUS:
18651       {
18652         int i;
18653         int overlap = -1;
18654         for (i = 0; i < nregs; i++)
18655           {
18656             /* We're only using DImode here because it's a convenient size.  */
18657             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18658             ops[1] = adjust_address (mem, DImode, 8 * i);
18659             if (reg_overlap_mentioned_p (ops[0], mem))
18660               {
18661                 gcc_assert (overlap == -1);
18662                 overlap = i;
18663               }
18664             else
18665               {
18666                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18667                 output_asm_insn (buff, ops);
18668               }
18669           }
18670         if (overlap != -1)
18671           {
18672             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18673             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18674             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18675             output_asm_insn (buff, ops);
18676           }
18677
18678         return "";
18679       }
18680
18681     default:
18682       gcc_unreachable ();
18683     }
18684
18685   sprintf (buff, templ, load ? "ld" : "st");
18686   output_asm_insn (buff, ops);
18687
18688   return "";
18689 }
18690
18691 /* Compute and return the length of neon_mov<mode>, where <mode> is
18692    one of VSTRUCT modes: EI, OI, CI or XI.  */
18693 int
18694 arm_attr_length_move_neon (rtx_insn *insn)
18695 {
18696   rtx reg, mem, addr;
18697   int load;
18698   machine_mode mode;
18699
18700   extract_insn_cached (insn);
18701
18702   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18703     {
18704       mode = GET_MODE (recog_data.operand[0]);
18705       switch (mode)
18706         {
18707         case E_EImode:
18708         case E_OImode:
18709           return 8;
18710         case E_CImode:
18711           return 12;
18712         case E_XImode:
18713           return 16;
18714         default:
18715           gcc_unreachable ();
18716         }
18717     }
18718
18719   load = REG_P (recog_data.operand[0]);
18720   reg = recog_data.operand[!load];
18721   mem = recog_data.operand[load];
18722
18723   gcc_assert (MEM_P (mem));
18724
18725   mode = GET_MODE (reg);
18726   addr = XEXP (mem, 0);
18727
18728   /* Strip off const from addresses like (const (plus (...))).  */
18729   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18730     addr = XEXP (addr, 0);
18731
18732   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18733     {
18734       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18735       return insns * 4;
18736     }
18737   else
18738     return 4;
18739 }
18740
18741 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18742    return zero.  */
18743
18744 int
18745 arm_address_offset_is_imm (rtx_insn *insn)
18746 {
18747   rtx mem, addr;
18748
18749   extract_insn_cached (insn);
18750
18751   if (REG_P (recog_data.operand[0]))
18752     return 0;
18753
18754   mem = recog_data.operand[0];
18755
18756   gcc_assert (MEM_P (mem));
18757
18758   addr = XEXP (mem, 0);
18759
18760   if (REG_P (addr)
18761       || (GET_CODE (addr) == PLUS
18762           && REG_P (XEXP (addr, 0))
18763           && CONST_INT_P (XEXP (addr, 1))))
18764     return 1;
18765   else
18766     return 0;
18767 }
18768
18769 /* Output an ADD r, s, #n where n may be too big for one instruction.
18770    If adding zero to one register, output nothing.  */
18771 const char *
18772 output_add_immediate (rtx *operands)
18773 {
18774   HOST_WIDE_INT n = INTVAL (operands[2]);
18775
18776   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18777     {
18778       if (n < 0)
18779         output_multi_immediate (operands,
18780                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18781                                 -n);
18782       else
18783         output_multi_immediate (operands,
18784                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18785                                 n);
18786     }
18787
18788   return "";
18789 }
18790
18791 /* Output a multiple immediate operation.
18792    OPERANDS is the vector of operands referred to in the output patterns.
18793    INSTR1 is the output pattern to use for the first constant.
18794    INSTR2 is the output pattern to use for subsequent constants.
18795    IMMED_OP is the index of the constant slot in OPERANDS.
18796    N is the constant value.  */
18797 static const char *
18798 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18799                         int immed_op, HOST_WIDE_INT n)
18800 {
18801 #if HOST_BITS_PER_WIDE_INT > 32
18802   n &= 0xffffffff;
18803 #endif
18804
18805   if (n == 0)
18806     {
18807       /* Quick and easy output.  */
18808       operands[immed_op] = const0_rtx;
18809       output_asm_insn (instr1, operands);
18810     }
18811   else
18812     {
18813       int i;
18814       const char * instr = instr1;
18815
18816       /* Note that n is never zero here (which would give no output).  */
18817       for (i = 0; i < 32; i += 2)
18818         {
18819           if (n & (3 << i))
18820             {
18821               operands[immed_op] = GEN_INT (n & (255 << i));
18822               output_asm_insn (instr, operands);
18823               instr = instr2;
18824               i += 6;
18825             }
18826         }
18827     }
18828
18829   return "";
18830 }
18831
18832 /* Return the name of a shifter operation.  */
18833 static const char *
18834 arm_shift_nmem(enum rtx_code code)
18835 {
18836   switch (code)
18837     {
18838     case ASHIFT:
18839       return ARM_LSL_NAME;
18840
18841     case ASHIFTRT:
18842       return "asr";
18843
18844     case LSHIFTRT:
18845       return "lsr";
18846
18847     case ROTATERT:
18848       return "ror";
18849
18850     default:
18851       abort();
18852     }
18853 }
18854
18855 /* Return the appropriate ARM instruction for the operation code.
18856    The returned result should not be overwritten.  OP is the rtx of the
18857    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18858    was shifted.  */
18859 const char *
18860 arithmetic_instr (rtx op, int shift_first_arg)
18861 {
18862   switch (GET_CODE (op))
18863     {
18864     case PLUS:
18865       return "add";
18866
18867     case MINUS:
18868       return shift_first_arg ? "rsb" : "sub";
18869
18870     case IOR:
18871       return "orr";
18872
18873     case XOR:
18874       return "eor";
18875
18876     case AND:
18877       return "and";
18878
18879     case ASHIFT:
18880     case ASHIFTRT:
18881     case LSHIFTRT:
18882     case ROTATERT:
18883       return arm_shift_nmem(GET_CODE(op));
18884
18885     default:
18886       gcc_unreachable ();
18887     }
18888 }
18889
18890 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18891    for the operation code.  The returned result should not be overwritten.
18892    OP is the rtx code of the shift.
18893    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18894    shift.  */
18895 static const char *
18896 shift_op (rtx op, HOST_WIDE_INT *amountp)
18897 {
18898   const char * mnem;
18899   enum rtx_code code = GET_CODE (op);
18900
18901   switch (code)
18902     {
18903     case ROTATE:
18904       if (!CONST_INT_P (XEXP (op, 1)))
18905         {
18906           output_operand_lossage ("invalid shift operand");
18907           return NULL;
18908         }
18909
18910       code = ROTATERT;
18911       *amountp = 32 - INTVAL (XEXP (op, 1));
18912       mnem = "ror";
18913       break;
18914
18915     case ASHIFT:
18916     case ASHIFTRT:
18917     case LSHIFTRT:
18918     case ROTATERT:
18919       mnem = arm_shift_nmem(code);
18920       if (CONST_INT_P (XEXP (op, 1)))
18921         {
18922           *amountp = INTVAL (XEXP (op, 1));
18923         }
18924       else if (REG_P (XEXP (op, 1)))
18925         {
18926           *amountp = -1;
18927           return mnem;
18928         }
18929       else
18930         {
18931           output_operand_lossage ("invalid shift operand");
18932           return NULL;
18933         }
18934       break;
18935
18936     case MULT:
18937       /* We never have to worry about the amount being other than a
18938          power of 2, since this case can never be reloaded from a reg.  */
18939       if (!CONST_INT_P (XEXP (op, 1)))
18940         {
18941           output_operand_lossage ("invalid shift operand");
18942           return NULL;
18943         }
18944
18945       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18946
18947       /* Amount must be a power of two.  */
18948       if (*amountp & (*amountp - 1))
18949         {
18950           output_operand_lossage ("invalid shift operand");
18951           return NULL;
18952         }
18953
18954       *amountp = exact_log2 (*amountp);
18955       gcc_assert (IN_RANGE (*amountp, 0, 31));
18956       return ARM_LSL_NAME;
18957
18958     default:
18959       output_operand_lossage ("invalid shift operand");
18960       return NULL;
18961     }
18962
18963   /* This is not 100% correct, but follows from the desire to merge
18964      multiplication by a power of 2 with the recognizer for a
18965      shift.  >=32 is not a valid shift for "lsl", so we must try and
18966      output a shift that produces the correct arithmetical result.
18967      Using lsr #32 is identical except for the fact that the carry bit
18968      is not set correctly if we set the flags; but we never use the
18969      carry bit from such an operation, so we can ignore that.  */
18970   if (code == ROTATERT)
18971     /* Rotate is just modulo 32.  */
18972     *amountp &= 31;
18973   else if (*amountp != (*amountp & 31))
18974     {
18975       if (code == ASHIFT)
18976         mnem = "lsr";
18977       *amountp = 32;
18978     }
18979
18980   /* Shifts of 0 are no-ops.  */
18981   if (*amountp == 0)
18982     return NULL;
18983
18984   return mnem;
18985 }
18986
18987 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18988    because /bin/as is horribly restrictive.  The judgement about
18989    whether or not each character is 'printable' (and can be output as
18990    is) or not (and must be printed with an octal escape) must be made
18991    with reference to the *host* character set -- the situation is
18992    similar to that discussed in the comments above pp_c_char in
18993    c-pretty-print.c.  */
18994
18995 #define MAX_ASCII_LEN 51
18996
18997 void
18998 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18999 {
19000   int i;
19001   int len_so_far = 0;
19002
19003   fputs ("\t.ascii\t\"", stream);
19004
19005   for (i = 0; i < len; i++)
19006     {
19007       int c = p[i];
19008
19009       if (len_so_far >= MAX_ASCII_LEN)
19010         {
19011           fputs ("\"\n\t.ascii\t\"", stream);
19012           len_so_far = 0;
19013         }
19014
19015       if (ISPRINT (c))
19016         {
19017           if (c == '\\' || c == '\"')
19018             {
19019               putc ('\\', stream);
19020               len_so_far++;
19021             }
19022           putc (c, stream);
19023           len_so_far++;
19024         }
19025       else
19026         {
19027           fprintf (stream, "\\%03o", c);
19028           len_so_far += 4;
19029         }
19030     }
19031
19032   fputs ("\"\n", stream);
19033 }
19034 \f
19035 /* Whether a register is callee saved or not.  This is necessary because high
19036    registers are marked as caller saved when optimizing for size on Thumb-1
19037    targets despite being callee saved in order to avoid using them.  */
19038 #define callee_saved_reg_p(reg) \
19039   (!call_used_regs[reg] \
19040    || (TARGET_THUMB1 && optimize_size \
19041        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19042
19043 /* Compute the register save mask for registers 0 through 12
19044    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19045
19046 static unsigned long
19047 arm_compute_save_reg0_reg12_mask (void)
19048 {
19049   unsigned long func_type = arm_current_func_type ();
19050   unsigned long save_reg_mask = 0;
19051   unsigned int reg;
19052
19053   if (IS_INTERRUPT (func_type))
19054     {
19055       unsigned int max_reg;
19056       /* Interrupt functions must not corrupt any registers,
19057          even call clobbered ones.  If this is a leaf function
19058          we can just examine the registers used by the RTL, but
19059          otherwise we have to assume that whatever function is
19060          called might clobber anything, and so we have to save
19061          all the call-clobbered registers as well.  */
19062       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19063         /* FIQ handlers have registers r8 - r12 banked, so
19064            we only need to check r0 - r7, Normal ISRs only
19065            bank r14 and r15, so we must check up to r12.
19066            r13 is the stack pointer which is always preserved,
19067            so we do not need to consider it here.  */
19068         max_reg = 7;
19069       else
19070         max_reg = 12;
19071
19072       for (reg = 0; reg <= max_reg; reg++)
19073         if (df_regs_ever_live_p (reg)
19074             || (! crtl->is_leaf && call_used_regs[reg]))
19075           save_reg_mask |= (1 << reg);
19076
19077       /* Also save the pic base register if necessary.  */
19078       if (flag_pic
19079           && !TARGET_SINGLE_PIC_BASE
19080           && arm_pic_register != INVALID_REGNUM
19081           && crtl->uses_pic_offset_table)
19082         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19083     }
19084   else if (IS_VOLATILE(func_type))
19085     {
19086       /* For noreturn functions we historically omitted register saves
19087          altogether.  However this really messes up debugging.  As a
19088          compromise save just the frame pointers.  Combined with the link
19089          register saved elsewhere this should be sufficient to get
19090          a backtrace.  */
19091       if (frame_pointer_needed)
19092         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19093       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19094         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19095       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19096         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19097     }
19098   else
19099     {
19100       /* In the normal case we only need to save those registers
19101          which are call saved and which are used by this function.  */
19102       for (reg = 0; reg <= 11; reg++)
19103         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19104           save_reg_mask |= (1 << reg);
19105
19106       /* Handle the frame pointer as a special case.  */
19107       if (frame_pointer_needed)
19108         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19109
19110       /* If we aren't loading the PIC register,
19111          don't stack it even though it may be live.  */
19112       if (flag_pic
19113           && !TARGET_SINGLE_PIC_BASE
19114           && arm_pic_register != INVALID_REGNUM
19115           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19116               || crtl->uses_pic_offset_table))
19117         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19118
19119       /* The prologue will copy SP into R0, so save it.  */
19120       if (IS_STACKALIGN (func_type))
19121         save_reg_mask |= 1;
19122     }
19123
19124   /* Save registers so the exception handler can modify them.  */
19125   if (crtl->calls_eh_return)
19126     {
19127       unsigned int i;
19128
19129       for (i = 0; ; i++)
19130         {
19131           reg = EH_RETURN_DATA_REGNO (i);
19132           if (reg == INVALID_REGNUM)
19133             break;
19134           save_reg_mask |= 1 << reg;
19135         }
19136     }
19137
19138   return save_reg_mask;
19139 }
19140
19141 /* Return true if r3 is live at the start of the function.  */
19142
19143 static bool
19144 arm_r3_live_at_start_p (void)
19145 {
19146   /* Just look at cfg info, which is still close enough to correct at this
19147      point.  This gives false positives for broken functions that might use
19148      uninitialized data that happens to be allocated in r3, but who cares?  */
19149   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19150 }
19151
19152 /* Compute the number of bytes used to store the static chain register on the
19153    stack, above the stack frame.  We need to know this accurately to get the
19154    alignment of the rest of the stack frame correct.  */
19155
19156 static int
19157 arm_compute_static_chain_stack_bytes (void)
19158 {
19159   /* See the defining assertion in arm_expand_prologue.  */
19160   if (IS_NESTED (arm_current_func_type ())
19161       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19162           || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19163               && !df_regs_ever_live_p (LR_REGNUM)))
19164       && arm_r3_live_at_start_p ()
19165       && crtl->args.pretend_args_size == 0)
19166     return 4;
19167
19168   return 0;
19169 }
19170
19171 /* Compute a bit mask of which core registers need to be
19172    saved on the stack for the current function.
19173    This is used by arm_compute_frame_layout, which may add extra registers.  */
19174
19175 static unsigned long
19176 arm_compute_save_core_reg_mask (void)
19177 {
19178   unsigned int save_reg_mask = 0;
19179   unsigned long func_type = arm_current_func_type ();
19180   unsigned int reg;
19181
19182   if (IS_NAKED (func_type))
19183     /* This should never really happen.  */
19184     return 0;
19185
19186   /* If we are creating a stack frame, then we must save the frame pointer,
19187      IP (which will hold the old stack pointer), LR and the PC.  */
19188   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19189     save_reg_mask |=
19190       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19191       | (1 << IP_REGNUM)
19192       | (1 << LR_REGNUM)
19193       | (1 << PC_REGNUM);
19194
19195   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19196
19197   /* Decide if we need to save the link register.
19198      Interrupt routines have their own banked link register,
19199      so they never need to save it.
19200      Otherwise if we do not use the link register we do not need to save
19201      it.  If we are pushing other registers onto the stack however, we
19202      can save an instruction in the epilogue by pushing the link register
19203      now and then popping it back into the PC.  This incurs extra memory
19204      accesses though, so we only do it when optimizing for size, and only
19205      if we know that we will not need a fancy return sequence.  */
19206   if (df_regs_ever_live_p (LR_REGNUM)
19207       || (save_reg_mask
19208           && optimize_size
19209           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19210           && !crtl->tail_call_emit
19211           && !crtl->calls_eh_return))
19212     save_reg_mask |= 1 << LR_REGNUM;
19213
19214   if (cfun->machine->lr_save_eliminated)
19215     save_reg_mask &= ~ (1 << LR_REGNUM);
19216
19217   if (TARGET_REALLY_IWMMXT
19218       && ((bit_count (save_reg_mask)
19219            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19220                            arm_compute_static_chain_stack_bytes())
19221            ) % 2) != 0)
19222     {
19223       /* The total number of registers that are going to be pushed
19224          onto the stack is odd.  We need to ensure that the stack
19225          is 64-bit aligned before we start to save iWMMXt registers,
19226          and also before we start to create locals.  (A local variable
19227          might be a double or long long which we will load/store using
19228          an iWMMXt instruction).  Therefore we need to push another
19229          ARM register, so that the stack will be 64-bit aligned.  We
19230          try to avoid using the arg registers (r0 -r3) as they might be
19231          used to pass values in a tail call.  */
19232       for (reg = 4; reg <= 12; reg++)
19233         if ((save_reg_mask & (1 << reg)) == 0)
19234           break;
19235
19236       if (reg <= 12)
19237         save_reg_mask |= (1 << reg);
19238       else
19239         {
19240           cfun->machine->sibcall_blocked = 1;
19241           save_reg_mask |= (1 << 3);
19242         }
19243     }
19244
19245   /* We may need to push an additional register for use initializing the
19246      PIC base register.  */
19247   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19248       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19249     {
19250       reg = thumb_find_work_register (1 << 4);
19251       if (!call_used_regs[reg])
19252         save_reg_mask |= (1 << reg);
19253     }
19254
19255   return save_reg_mask;
19256 }
19257
19258 /* Compute a bit mask of which core registers need to be
19259    saved on the stack for the current function.  */
19260 static unsigned long
19261 thumb1_compute_save_core_reg_mask (void)
19262 {
19263   unsigned long mask;
19264   unsigned reg;
19265
19266   mask = 0;
19267   for (reg = 0; reg < 12; reg ++)
19268     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19269       mask |= 1 << reg;
19270
19271   /* Handle the frame pointer as a special case.  */
19272   if (frame_pointer_needed)
19273     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19274
19275   if (flag_pic
19276       && !TARGET_SINGLE_PIC_BASE
19277       && arm_pic_register != INVALID_REGNUM
19278       && crtl->uses_pic_offset_table)
19279     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19280
19281   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19282   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19283     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19284
19285   /* LR will also be pushed if any lo regs are pushed.  */
19286   if (mask & 0xff || thumb_force_lr_save ())
19287     mask |= (1 << LR_REGNUM);
19288
19289   /* Make sure we have a low work register if we need one.
19290      We will need one if we are going to push a high register,
19291      but we are not currently intending to push a low register.  */
19292   if ((mask & 0xff) == 0
19293       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19294     {
19295       /* Use thumb_find_work_register to choose which register
19296          we will use.  If the register is live then we will
19297          have to push it.  Use LAST_LO_REGNUM as our fallback
19298          choice for the register to select.  */
19299       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19300       /* Make sure the register returned by thumb_find_work_register is
19301          not part of the return value.  */
19302       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19303         reg = LAST_LO_REGNUM;
19304
19305       if (callee_saved_reg_p (reg))
19306         mask |= 1 << reg;
19307     }
19308
19309   /* The 504 below is 8 bytes less than 512 because there are two possible
19310      alignment words.  We can't tell here if they will be present or not so we
19311      have to play it safe and assume that they are. */
19312   if ((CALLER_INTERWORKING_SLOT_SIZE +
19313        ROUND_UP_WORD (get_frame_size ()) +
19314        crtl->outgoing_args_size) >= 504)
19315     {
19316       /* This is the same as the code in thumb1_expand_prologue() which
19317          determines which register to use for stack decrement. */
19318       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19319         if (mask & (1 << reg))
19320           break;
19321
19322       if (reg > LAST_LO_REGNUM)
19323         {
19324           /* Make sure we have a register available for stack decrement. */
19325           mask |= 1 << LAST_LO_REGNUM;
19326         }
19327     }
19328
19329   return mask;
19330 }
19331
19332
19333 /* Return the number of bytes required to save VFP registers.  */
19334 static int
19335 arm_get_vfp_saved_size (void)
19336 {
19337   unsigned int regno;
19338   int count;
19339   int saved;
19340
19341   saved = 0;
19342   /* Space for saved VFP registers.  */
19343   if (TARGET_HARD_FLOAT)
19344     {
19345       count = 0;
19346       for (regno = FIRST_VFP_REGNUM;
19347            regno < LAST_VFP_REGNUM;
19348            regno += 2)
19349         {
19350           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19351               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19352             {
19353               if (count > 0)
19354                 {
19355                   /* Workaround ARM10 VFPr1 bug.  */
19356                   if (count == 2 && !arm_arch6)
19357                     count++;
19358                   saved += count * 8;
19359                 }
19360               count = 0;
19361             }
19362           else
19363             count++;
19364         }
19365       if (count > 0)
19366         {
19367           if (count == 2 && !arm_arch6)
19368             count++;
19369           saved += count * 8;
19370         }
19371     }
19372   return saved;
19373 }
19374
19375
19376 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19377    everything bar the final return instruction.  If simple_return is true,
19378    then do not output epilogue, because it has already been emitted in RTL.  */
19379 const char *
19380 output_return_instruction (rtx operand, bool really_return, bool reverse,
19381                            bool simple_return)
19382 {
19383   char conditional[10];
19384   char instr[100];
19385   unsigned reg;
19386   unsigned long live_regs_mask;
19387   unsigned long func_type;
19388   arm_stack_offsets *offsets;
19389
19390   func_type = arm_current_func_type ();
19391
19392   if (IS_NAKED (func_type))
19393     return "";
19394
19395   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19396     {
19397       /* If this function was declared non-returning, and we have
19398          found a tail call, then we have to trust that the called
19399          function won't return.  */
19400       if (really_return)
19401         {
19402           rtx ops[2];
19403
19404           /* Otherwise, trap an attempted return by aborting.  */
19405           ops[0] = operand;
19406           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19407                                        : "abort");
19408           assemble_external_libcall (ops[1]);
19409           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19410         }
19411
19412       return "";
19413     }
19414
19415   gcc_assert (!cfun->calls_alloca || really_return);
19416
19417   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19418
19419   cfun->machine->return_used_this_function = 1;
19420
19421   offsets = arm_get_frame_offsets ();
19422   live_regs_mask = offsets->saved_regs_mask;
19423
19424   if (!simple_return && live_regs_mask)
19425     {
19426       const char * return_reg;
19427
19428       /* If we do not have any special requirements for function exit
19429          (e.g. interworking) then we can load the return address
19430          directly into the PC.  Otherwise we must load it into LR.  */
19431       if (really_return
19432           && !IS_CMSE_ENTRY (func_type)
19433           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19434         return_reg = reg_names[PC_REGNUM];
19435       else
19436         return_reg = reg_names[LR_REGNUM];
19437
19438       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19439         {
19440           /* There are three possible reasons for the IP register
19441              being saved.  1) a stack frame was created, in which case
19442              IP contains the old stack pointer, or 2) an ISR routine
19443              corrupted it, or 3) it was saved to align the stack on
19444              iWMMXt.  In case 1, restore IP into SP, otherwise just
19445              restore IP.  */
19446           if (frame_pointer_needed)
19447             {
19448               live_regs_mask &= ~ (1 << IP_REGNUM);
19449               live_regs_mask |=   (1 << SP_REGNUM);
19450             }
19451           else
19452             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19453         }
19454
19455       /* On some ARM architectures it is faster to use LDR rather than
19456          LDM to load a single register.  On other architectures, the
19457          cost is the same.  In 26 bit mode, or for exception handlers,
19458          we have to use LDM to load the PC so that the CPSR is also
19459          restored.  */
19460       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19461         if (live_regs_mask == (1U << reg))
19462           break;
19463
19464       if (reg <= LAST_ARM_REGNUM
19465           && (reg != LR_REGNUM
19466               || ! really_return
19467               || ! IS_INTERRUPT (func_type)))
19468         {
19469           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19470                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19471         }
19472       else
19473         {
19474           char *p;
19475           int first = 1;
19476
19477           /* Generate the load multiple instruction to restore the
19478              registers.  Note we can get here, even if
19479              frame_pointer_needed is true, but only if sp already
19480              points to the base of the saved core registers.  */
19481           if (live_regs_mask & (1 << SP_REGNUM))
19482             {
19483               unsigned HOST_WIDE_INT stack_adjust;
19484
19485               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19486               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19487
19488               if (stack_adjust && arm_arch5 && TARGET_ARM)
19489                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19490               else
19491                 {
19492                   /* If we can't use ldmib (SA110 bug),
19493                      then try to pop r3 instead.  */
19494                   if (stack_adjust)
19495                     live_regs_mask |= 1 << 3;
19496
19497                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19498                 }
19499             }
19500           /* For interrupt returns we have to use an LDM rather than
19501              a POP so that we can use the exception return variant.  */
19502           else if (IS_INTERRUPT (func_type))
19503             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19504           else
19505             sprintf (instr, "pop%s\t{", conditional);
19506
19507           p = instr + strlen (instr);
19508
19509           for (reg = 0; reg <= SP_REGNUM; reg++)
19510             if (live_regs_mask & (1 << reg))
19511               {
19512                 int l = strlen (reg_names[reg]);
19513
19514                 if (first)
19515                   first = 0;
19516                 else
19517                   {
19518                     memcpy (p, ", ", 2);
19519                     p += 2;
19520                   }
19521
19522                 memcpy (p, "%|", 2);
19523                 memcpy (p + 2, reg_names[reg], l);
19524                 p += l + 2;
19525               }
19526
19527           if (live_regs_mask & (1 << LR_REGNUM))
19528             {
19529               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19530               /* If returning from an interrupt, restore the CPSR.  */
19531               if (IS_INTERRUPT (func_type))
19532                 strcat (p, "^");
19533             }
19534           else
19535             strcpy (p, "}");
19536         }
19537
19538       output_asm_insn (instr, & operand);
19539
19540       /* See if we need to generate an extra instruction to
19541          perform the actual function return.  */
19542       if (really_return
19543           && func_type != ARM_FT_INTERWORKED
19544           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19545         {
19546           /* The return has already been handled
19547              by loading the LR into the PC.  */
19548           return "";
19549         }
19550     }
19551
19552   if (really_return)
19553     {
19554       switch ((int) ARM_FUNC_TYPE (func_type))
19555         {
19556         case ARM_FT_ISR:
19557         case ARM_FT_FIQ:
19558           /* ??? This is wrong for unified assembly syntax.  */
19559           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19560           break;
19561
19562         case ARM_FT_INTERWORKED:
19563           gcc_assert (arm_arch5 || arm_arch4t);
19564           sprintf (instr, "bx%s\t%%|lr", conditional);
19565           break;
19566
19567         case ARM_FT_EXCEPTION:
19568           /* ??? This is wrong for unified assembly syntax.  */
19569           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19570           break;
19571
19572         default:
19573           if (IS_CMSE_ENTRY (func_type))
19574             {
19575               /* Check if we have to clear the 'GE bits' which is only used if
19576                  parallel add and subtraction instructions are available.  */
19577               if (TARGET_INT_SIMD)
19578                 snprintf (instr, sizeof (instr),
19579                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19580               else
19581                 snprintf (instr, sizeof (instr),
19582                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19583
19584               output_asm_insn (instr, & operand);
19585               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19586                 {
19587                   /* Clear the cumulative exception-status bits (0-4,7) and the
19588                      condition code bits (28-31) of the FPSCR.  We need to
19589                      remember to clear the first scratch register used (IP) and
19590                      save and restore the second (r4).  */
19591                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19592                   output_asm_insn (instr, & operand);
19593                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19594                   output_asm_insn (instr, & operand);
19595                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19596                   output_asm_insn (instr, & operand);
19597                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19598                   output_asm_insn (instr, & operand);
19599                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19600                   output_asm_insn (instr, & operand);
19601                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19602                   output_asm_insn (instr, & operand);
19603                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19604                   output_asm_insn (instr, & operand);
19605                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19606                   output_asm_insn (instr, & operand);
19607                 }
19608               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19609             }
19610           /* Use bx if it's available.  */
19611           else if (arm_arch5 || arm_arch4t)
19612             sprintf (instr, "bx%s\t%%|lr", conditional);
19613           else
19614             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19615           break;
19616         }
19617
19618       output_asm_insn (instr, & operand);
19619     }
19620
19621   return "";
19622 }
19623
19624 /* Output in FILE asm statements needed to declare the NAME of the function
19625    defined by its DECL node.  */
19626
19627 void
19628 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19629 {
19630   size_t cmse_name_len;
19631   char *cmse_name = 0;
19632   char cmse_prefix[] = "__acle_se_";
19633
19634   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19635      extra function label for each function with the 'cmse_nonsecure_entry'
19636      attribute.  This extra function label should be prepended with
19637      '__acle_se_', telling the linker that it needs to create secure gateway
19638      veneers for this function.  */
19639   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19640                                     DECL_ATTRIBUTES (decl)))
19641     {
19642       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19643       cmse_name = XALLOCAVEC (char, cmse_name_len);
19644       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19645       targetm.asm_out.globalize_label (file, cmse_name);
19646
19647       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19648       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19649     }
19650
19651   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19652   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19653   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19654   ASM_OUTPUT_LABEL (file, name);
19655
19656   if (cmse_name)
19657     ASM_OUTPUT_LABEL (file, cmse_name);
19658
19659   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19660 }
19661
19662 /* Write the function name into the code section, directly preceding
19663    the function prologue.
19664
19665    Code will be output similar to this:
19666      t0
19667          .ascii "arm_poke_function_name", 0
19668          .align
19669      t1
19670          .word 0xff000000 + (t1 - t0)
19671      arm_poke_function_name
19672          mov     ip, sp
19673          stmfd   sp!, {fp, ip, lr, pc}
19674          sub     fp, ip, #4
19675
19676    When performing a stack backtrace, code can inspect the value
19677    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19678    at location pc - 12 and the top 8 bits are set, then we know
19679    that there is a function name embedded immediately preceding this
19680    location and has length ((pc[-3]) & 0xff000000).
19681
19682    We assume that pc is declared as a pointer to an unsigned long.
19683
19684    It is of no benefit to output the function name if we are assembling
19685    a leaf function.  These function types will not contain a stack
19686    backtrace structure, therefore it is not possible to determine the
19687    function name.  */
19688 void
19689 arm_poke_function_name (FILE *stream, const char *name)
19690 {
19691   unsigned long alignlength;
19692   unsigned long length;
19693   rtx           x;
19694
19695   length      = strlen (name) + 1;
19696   alignlength = ROUND_UP_WORD (length);
19697
19698   ASM_OUTPUT_ASCII (stream, name, length);
19699   ASM_OUTPUT_ALIGN (stream, 2);
19700   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19701   assemble_aligned_integer (UNITS_PER_WORD, x);
19702 }
19703
19704 /* Place some comments into the assembler stream
19705    describing the current function.  */
19706 static void
19707 arm_output_function_prologue (FILE *f)
19708 {
19709   unsigned long func_type;
19710
19711   /* Sanity check.  */
19712   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19713
19714   func_type = arm_current_func_type ();
19715
19716   switch ((int) ARM_FUNC_TYPE (func_type))
19717     {
19718     default:
19719     case ARM_FT_NORMAL:
19720       break;
19721     case ARM_FT_INTERWORKED:
19722       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19723       break;
19724     case ARM_FT_ISR:
19725       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19726       break;
19727     case ARM_FT_FIQ:
19728       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19729       break;
19730     case ARM_FT_EXCEPTION:
19731       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19732       break;
19733     }
19734
19735   if (IS_NAKED (func_type))
19736     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19737
19738   if (IS_VOLATILE (func_type))
19739     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19740
19741   if (IS_NESTED (func_type))
19742     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19743   if (IS_STACKALIGN (func_type))
19744     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19745   if (IS_CMSE_ENTRY (func_type))
19746     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19747
19748   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19749                crtl->args.size,
19750                crtl->args.pretend_args_size,
19751                (HOST_WIDE_INT) get_frame_size ());
19752
19753   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19754                frame_pointer_needed,
19755                cfun->machine->uses_anonymous_args);
19756
19757   if (cfun->machine->lr_save_eliminated)
19758     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19759
19760   if (crtl->calls_eh_return)
19761     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19762
19763 }
19764
19765 static void
19766 arm_output_function_epilogue (FILE *)
19767 {
19768   arm_stack_offsets *offsets;
19769
19770   if (TARGET_THUMB1)
19771     {
19772       int regno;
19773
19774       /* Emit any call-via-reg trampolines that are needed for v4t support
19775          of call_reg and call_value_reg type insns.  */
19776       for (regno = 0; regno < LR_REGNUM; regno++)
19777         {
19778           rtx label = cfun->machine->call_via[regno];
19779
19780           if (label != NULL)
19781             {
19782               switch_to_section (function_section (current_function_decl));
19783               targetm.asm_out.internal_label (asm_out_file, "L",
19784                                               CODE_LABEL_NUMBER (label));
19785               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19786             }
19787         }
19788
19789       /* ??? Probably not safe to set this here, since it assumes that a
19790          function will be emitted as assembly immediately after we generate
19791          RTL for it.  This does not happen for inline functions.  */
19792       cfun->machine->return_used_this_function = 0;
19793     }
19794   else /* TARGET_32BIT */
19795     {
19796       /* We need to take into account any stack-frame rounding.  */
19797       offsets = arm_get_frame_offsets ();
19798
19799       gcc_assert (!use_return_insn (FALSE, NULL)
19800                   || (cfun->machine->return_used_this_function != 0)
19801                   || offsets->saved_regs == offsets->outgoing_args
19802                   || frame_pointer_needed);
19803     }
19804 }
19805
19806 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19807    STR and STRD.  If an even number of registers are being pushed, one
19808    or more STRD patterns are created for each register pair.  If an
19809    odd number of registers are pushed, emit an initial STR followed by
19810    as many STRD instructions as are needed.  This works best when the
19811    stack is initially 64-bit aligned (the normal case), since it
19812    ensures that each STRD is also 64-bit aligned.  */
19813 static void
19814 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19815 {
19816   int num_regs = 0;
19817   int i;
19818   int regno;
19819   rtx par = NULL_RTX;
19820   rtx dwarf = NULL_RTX;
19821   rtx tmp;
19822   bool first = true;
19823
19824   num_regs = bit_count (saved_regs_mask);
19825
19826   /* Must be at least one register to save, and can't save SP or PC.  */
19827   gcc_assert (num_regs > 0 && num_regs <= 14);
19828   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19829   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19830
19831   /* Create sequence for DWARF info.  All the frame-related data for
19832      debugging is held in this wrapper.  */
19833   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19834
19835   /* Describe the stack adjustment.  */
19836   tmp = gen_rtx_SET (stack_pointer_rtx,
19837                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19838   RTX_FRAME_RELATED_P (tmp) = 1;
19839   XVECEXP (dwarf, 0, 0) = tmp;
19840
19841   /* Find the first register.  */
19842   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19843     ;
19844
19845   i = 0;
19846
19847   /* If there's an odd number of registers to push.  Start off by
19848      pushing a single register.  This ensures that subsequent strd
19849      operations are dword aligned (assuming that SP was originally
19850      64-bit aligned).  */
19851   if ((num_regs & 1) != 0)
19852     {
19853       rtx reg, mem, insn;
19854
19855       reg = gen_rtx_REG (SImode, regno);
19856       if (num_regs == 1)
19857         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19858                                                      stack_pointer_rtx));
19859       else
19860         mem = gen_frame_mem (Pmode,
19861                              gen_rtx_PRE_MODIFY
19862                              (Pmode, stack_pointer_rtx,
19863                               plus_constant (Pmode, stack_pointer_rtx,
19864                                              -4 * num_regs)));
19865
19866       tmp = gen_rtx_SET (mem, reg);
19867       RTX_FRAME_RELATED_P (tmp) = 1;
19868       insn = emit_insn (tmp);
19869       RTX_FRAME_RELATED_P (insn) = 1;
19870       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19871       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19872       RTX_FRAME_RELATED_P (tmp) = 1;
19873       i++;
19874       regno++;
19875       XVECEXP (dwarf, 0, i) = tmp;
19876       first = false;
19877     }
19878
19879   while (i < num_regs)
19880     if (saved_regs_mask & (1 << regno))
19881       {
19882         rtx reg1, reg2, mem1, mem2;
19883         rtx tmp0, tmp1, tmp2;
19884         int regno2;
19885
19886         /* Find the register to pair with this one.  */
19887         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19888              regno2++)
19889           ;
19890
19891         reg1 = gen_rtx_REG (SImode, regno);
19892         reg2 = gen_rtx_REG (SImode, regno2);
19893
19894         if (first)
19895           {
19896             rtx insn;
19897
19898             first = false;
19899             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19900                                                         stack_pointer_rtx,
19901                                                         -4 * num_regs));
19902             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19903                                                         stack_pointer_rtx,
19904                                                         -4 * (num_regs - 1)));
19905             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19906                                 plus_constant (Pmode, stack_pointer_rtx,
19907                                                -4 * (num_regs)));
19908             tmp1 = gen_rtx_SET (mem1, reg1);
19909             tmp2 = gen_rtx_SET (mem2, reg2);
19910             RTX_FRAME_RELATED_P (tmp0) = 1;
19911             RTX_FRAME_RELATED_P (tmp1) = 1;
19912             RTX_FRAME_RELATED_P (tmp2) = 1;
19913             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19914             XVECEXP (par, 0, 0) = tmp0;
19915             XVECEXP (par, 0, 1) = tmp1;
19916             XVECEXP (par, 0, 2) = tmp2;
19917             insn = emit_insn (par);
19918             RTX_FRAME_RELATED_P (insn) = 1;
19919             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19920           }
19921         else
19922           {
19923             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19924                                                         stack_pointer_rtx,
19925                                                         4 * i));
19926             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19927                                                         stack_pointer_rtx,
19928                                                         4 * (i + 1)));
19929             tmp1 = gen_rtx_SET (mem1, reg1);
19930             tmp2 = gen_rtx_SET (mem2, reg2);
19931             RTX_FRAME_RELATED_P (tmp1) = 1;
19932             RTX_FRAME_RELATED_P (tmp2) = 1;
19933             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19934             XVECEXP (par, 0, 0) = tmp1;
19935             XVECEXP (par, 0, 1) = tmp2;
19936             emit_insn (par);
19937           }
19938
19939         /* Create unwind information.  This is an approximation.  */
19940         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19941                                            plus_constant (Pmode,
19942                                                           stack_pointer_rtx,
19943                                                           4 * i)),
19944                             reg1);
19945         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19946                                            plus_constant (Pmode,
19947                                                           stack_pointer_rtx,
19948                                                           4 * (i + 1))),
19949                             reg2);
19950
19951         RTX_FRAME_RELATED_P (tmp1) = 1;
19952         RTX_FRAME_RELATED_P (tmp2) = 1;
19953         XVECEXP (dwarf, 0, i + 1) = tmp1;
19954         XVECEXP (dwarf, 0, i + 2) = tmp2;
19955         i += 2;
19956         regno = regno2 + 1;
19957       }
19958     else
19959       regno++;
19960
19961   return;
19962 }
19963
19964 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19965    whenever possible, otherwise it emits single-word stores.  The first store
19966    also allocates stack space for all saved registers, using writeback with
19967    post-addressing mode.  All other stores use offset addressing.  If no STRD
19968    can be emitted, this function emits a sequence of single-word stores,
19969    and not an STM as before, because single-word stores provide more freedom
19970    scheduling and can be turned into an STM by peephole optimizations.  */
19971 static void
19972 arm_emit_strd_push (unsigned long saved_regs_mask)
19973 {
19974   int num_regs = 0;
19975   int i, j, dwarf_index  = 0;
19976   int offset = 0;
19977   rtx dwarf = NULL_RTX;
19978   rtx insn = NULL_RTX;
19979   rtx tmp, mem;
19980
19981   /* TODO: A more efficient code can be emitted by changing the
19982      layout, e.g., first push all pairs that can use STRD to keep the
19983      stack aligned, and then push all other registers.  */
19984   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19985     if (saved_regs_mask & (1 << i))
19986       num_regs++;
19987
19988   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19989   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19990   gcc_assert (num_regs > 0);
19991
19992   /* Create sequence for DWARF info.  */
19993   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19994
19995   /* For dwarf info, we generate explicit stack update.  */
19996   tmp = gen_rtx_SET (stack_pointer_rtx,
19997                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19998   RTX_FRAME_RELATED_P (tmp) = 1;
19999   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20000
20001   /* Save registers.  */
20002   offset = - 4 * num_regs;
20003   j = 0;
20004   while (j <= LAST_ARM_REGNUM)
20005     if (saved_regs_mask & (1 << j))
20006       {
20007         if ((j % 2 == 0)
20008             && (saved_regs_mask & (1 << (j + 1))))
20009           {
20010             /* Current register and previous register form register pair for
20011                which STRD can be generated.  */
20012             if (offset < 0)
20013               {
20014                 /* Allocate stack space for all saved registers.  */
20015                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20016                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20017                 mem = gen_frame_mem (DImode, tmp);
20018                 offset = 0;
20019               }
20020             else if (offset > 0)
20021               mem = gen_frame_mem (DImode,
20022                                    plus_constant (Pmode,
20023                                                   stack_pointer_rtx,
20024                                                   offset));
20025             else
20026               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20027
20028             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20029             RTX_FRAME_RELATED_P (tmp) = 1;
20030             tmp = emit_insn (tmp);
20031
20032             /* Record the first store insn.  */
20033             if (dwarf_index == 1)
20034               insn = tmp;
20035
20036             /* Generate dwarf info.  */
20037             mem = gen_frame_mem (SImode,
20038                                  plus_constant (Pmode,
20039                                                 stack_pointer_rtx,
20040                                                 offset));
20041             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20042             RTX_FRAME_RELATED_P (tmp) = 1;
20043             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20044
20045             mem = gen_frame_mem (SImode,
20046                                  plus_constant (Pmode,
20047                                                 stack_pointer_rtx,
20048                                                 offset + 4));
20049             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20050             RTX_FRAME_RELATED_P (tmp) = 1;
20051             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20052
20053             offset += 8;
20054             j += 2;
20055           }
20056         else
20057           {
20058             /* Emit a single word store.  */
20059             if (offset < 0)
20060               {
20061                 /* Allocate stack space for all saved registers.  */
20062                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20063                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20064                 mem = gen_frame_mem (SImode, tmp);
20065                 offset = 0;
20066               }
20067             else if (offset > 0)
20068               mem = gen_frame_mem (SImode,
20069                                    plus_constant (Pmode,
20070                                                   stack_pointer_rtx,
20071                                                   offset));
20072             else
20073               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20074
20075             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20076             RTX_FRAME_RELATED_P (tmp) = 1;
20077             tmp = emit_insn (tmp);
20078
20079             /* Record the first store insn.  */
20080             if (dwarf_index == 1)
20081               insn = tmp;
20082
20083             /* Generate dwarf info.  */
20084             mem = gen_frame_mem (SImode,
20085                                  plus_constant(Pmode,
20086                                                stack_pointer_rtx,
20087                                                offset));
20088             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20089             RTX_FRAME_RELATED_P (tmp) = 1;
20090             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20091
20092             offset += 4;
20093             j += 1;
20094           }
20095       }
20096     else
20097       j++;
20098
20099   /* Attach dwarf info to the first insn we generate.  */
20100   gcc_assert (insn != NULL_RTX);
20101   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20102   RTX_FRAME_RELATED_P (insn) = 1;
20103 }
20104
20105 /* Generate and emit an insn that we will recognize as a push_multi.
20106    Unfortunately, since this insn does not reflect very well the actual
20107    semantics of the operation, we need to annotate the insn for the benefit
20108    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20109    MASK for registers that should be annotated for DWARF2 frame unwind
20110    information.  */
20111 static rtx
20112 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20113 {
20114   int num_regs = 0;
20115   int num_dwarf_regs = 0;
20116   int i, j;
20117   rtx par;
20118   rtx dwarf;
20119   int dwarf_par_index;
20120   rtx tmp, reg;
20121
20122   /* We don't record the PC in the dwarf frame information.  */
20123   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20124
20125   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20126     {
20127       if (mask & (1 << i))
20128         num_regs++;
20129       if (dwarf_regs_mask & (1 << i))
20130         num_dwarf_regs++;
20131     }
20132
20133   gcc_assert (num_regs && num_regs <= 16);
20134   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20135
20136   /* For the body of the insn we are going to generate an UNSPEC in
20137      parallel with several USEs.  This allows the insn to be recognized
20138      by the push_multi pattern in the arm.md file.
20139
20140      The body of the insn looks something like this:
20141
20142        (parallel [
20143            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20144                                         (const_int:SI <num>)))
20145                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20146            (use (reg:SI XX))
20147            (use (reg:SI YY))
20148            ...
20149         ])
20150
20151      For the frame note however, we try to be more explicit and actually
20152      show each register being stored into the stack frame, plus a (single)
20153      decrement of the stack pointer.  We do it this way in order to be
20154      friendly to the stack unwinding code, which only wants to see a single
20155      stack decrement per instruction.  The RTL we generate for the note looks
20156      something like this:
20157
20158       (sequence [
20159            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20160            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20161            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20162            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20163            ...
20164         ])
20165
20166      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20167      instead we'd have a parallel expression detailing all
20168      the stores to the various memory addresses so that debug
20169      information is more up-to-date. Remember however while writing
20170      this to take care of the constraints with the push instruction.
20171
20172      Note also that this has to be taken care of for the VFP registers.
20173
20174      For more see PR43399.  */
20175
20176   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20177   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20178   dwarf_par_index = 1;
20179
20180   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20181     {
20182       if (mask & (1 << i))
20183         {
20184           reg = gen_rtx_REG (SImode, i);
20185
20186           XVECEXP (par, 0, 0)
20187             = gen_rtx_SET (gen_frame_mem
20188                            (BLKmode,
20189                             gen_rtx_PRE_MODIFY (Pmode,
20190                                                 stack_pointer_rtx,
20191                                                 plus_constant
20192                                                 (Pmode, stack_pointer_rtx,
20193                                                  -4 * num_regs))
20194                             ),
20195                            gen_rtx_UNSPEC (BLKmode,
20196                                            gen_rtvec (1, reg),
20197                                            UNSPEC_PUSH_MULT));
20198
20199           if (dwarf_regs_mask & (1 << i))
20200             {
20201               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20202                                  reg);
20203               RTX_FRAME_RELATED_P (tmp) = 1;
20204               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20205             }
20206
20207           break;
20208         }
20209     }
20210
20211   for (j = 1, i++; j < num_regs; i++)
20212     {
20213       if (mask & (1 << i))
20214         {
20215           reg = gen_rtx_REG (SImode, i);
20216
20217           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20218
20219           if (dwarf_regs_mask & (1 << i))
20220             {
20221               tmp
20222                 = gen_rtx_SET (gen_frame_mem
20223                                (SImode,
20224                                 plus_constant (Pmode, stack_pointer_rtx,
20225                                                4 * j)),
20226                                reg);
20227               RTX_FRAME_RELATED_P (tmp) = 1;
20228               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20229             }
20230
20231           j++;
20232         }
20233     }
20234
20235   par = emit_insn (par);
20236
20237   tmp = gen_rtx_SET (stack_pointer_rtx,
20238                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20239   RTX_FRAME_RELATED_P (tmp) = 1;
20240   XVECEXP (dwarf, 0, 0) = tmp;
20241
20242   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20243
20244   return par;
20245 }
20246
20247 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20248    SIZE is the offset to be adjusted.
20249    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20250 static void
20251 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20252 {
20253   rtx dwarf;
20254
20255   RTX_FRAME_RELATED_P (insn) = 1;
20256   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20257   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20258 }
20259
20260 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20261    SAVED_REGS_MASK shows which registers need to be restored.
20262
20263    Unfortunately, since this insn does not reflect very well the actual
20264    semantics of the operation, we need to annotate the insn for the benefit
20265    of DWARF2 frame unwind information.  */
20266 static void
20267 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20268 {
20269   int num_regs = 0;
20270   int i, j;
20271   rtx par;
20272   rtx dwarf = NULL_RTX;
20273   rtx tmp, reg;
20274   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20275   int offset_adj;
20276   int emit_update;
20277
20278   offset_adj = return_in_pc ? 1 : 0;
20279   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20280     if (saved_regs_mask & (1 << i))
20281       num_regs++;
20282
20283   gcc_assert (num_regs && num_regs <= 16);
20284
20285   /* If SP is in reglist, then we don't emit SP update insn.  */
20286   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20287
20288   /* The parallel needs to hold num_regs SETs
20289      and one SET for the stack update.  */
20290   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20291
20292   if (return_in_pc)
20293     XVECEXP (par, 0, 0) = ret_rtx;
20294
20295   if (emit_update)
20296     {
20297       /* Increment the stack pointer, based on there being
20298          num_regs 4-byte registers to restore.  */
20299       tmp = gen_rtx_SET (stack_pointer_rtx,
20300                          plus_constant (Pmode,
20301                                         stack_pointer_rtx,
20302                                         4 * num_regs));
20303       RTX_FRAME_RELATED_P (tmp) = 1;
20304       XVECEXP (par, 0, offset_adj) = tmp;
20305     }
20306
20307   /* Now restore every reg, which may include PC.  */
20308   for (j = 0, i = 0; j < num_regs; i++)
20309     if (saved_regs_mask & (1 << i))
20310       {
20311         reg = gen_rtx_REG (SImode, i);
20312         if ((num_regs == 1) && emit_update && !return_in_pc)
20313           {
20314             /* Emit single load with writeback.  */
20315             tmp = gen_frame_mem (SImode,
20316                                  gen_rtx_POST_INC (Pmode,
20317                                                    stack_pointer_rtx));
20318             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20319             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20320             return;
20321           }
20322
20323         tmp = gen_rtx_SET (reg,
20324                            gen_frame_mem
20325                            (SImode,
20326                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20327         RTX_FRAME_RELATED_P (tmp) = 1;
20328         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20329
20330         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20331            should not have PC, skip PC.  */
20332         if (i != PC_REGNUM)
20333           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20334
20335         j++;
20336       }
20337
20338   if (return_in_pc)
20339     par = emit_jump_insn (par);
20340   else
20341     par = emit_insn (par);
20342
20343   REG_NOTES (par) = dwarf;
20344   if (!return_in_pc)
20345     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20346                                  stack_pointer_rtx, stack_pointer_rtx);
20347 }
20348
20349 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20350    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20351
20352    Unfortunately, since this insn does not reflect very well the actual
20353    semantics of the operation, we need to annotate the insn for the benefit
20354    of DWARF2 frame unwind information.  */
20355 static void
20356 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20357 {
20358   int i, j;
20359   rtx par;
20360   rtx dwarf = NULL_RTX;
20361   rtx tmp, reg;
20362
20363   gcc_assert (num_regs && num_regs <= 32);
20364
20365     /* Workaround ARM10 VFPr1 bug.  */
20366   if (num_regs == 2 && !arm_arch6)
20367     {
20368       if (first_reg == 15)
20369         first_reg--;
20370
20371       num_regs++;
20372     }
20373
20374   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20375      there could be up to 32 D-registers to restore.
20376      If there are more than 16 D-registers, make two recursive calls,
20377      each of which emits one pop_multi instruction.  */
20378   if (num_regs > 16)
20379     {
20380       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20381       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20382       return;
20383     }
20384
20385   /* The parallel needs to hold num_regs SETs
20386      and one SET for the stack update.  */
20387   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20388
20389   /* Increment the stack pointer, based on there being
20390      num_regs 8-byte registers to restore.  */
20391   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20392   RTX_FRAME_RELATED_P (tmp) = 1;
20393   XVECEXP (par, 0, 0) = tmp;
20394
20395   /* Now show every reg that will be restored, using a SET for each.  */
20396   for (j = 0, i=first_reg; j < num_regs; i += 2)
20397     {
20398       reg = gen_rtx_REG (DFmode, i);
20399
20400       tmp = gen_rtx_SET (reg,
20401                          gen_frame_mem
20402                          (DFmode,
20403                           plus_constant (Pmode, base_reg, 8 * j)));
20404       RTX_FRAME_RELATED_P (tmp) = 1;
20405       XVECEXP (par, 0, j + 1) = tmp;
20406
20407       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20408
20409       j++;
20410     }
20411
20412   par = emit_insn (par);
20413   REG_NOTES (par) = dwarf;
20414
20415   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20416   if (REGNO (base_reg) == IP_REGNUM)
20417     {
20418       RTX_FRAME_RELATED_P (par) = 1;
20419       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20420     }
20421   else
20422     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20423                                  base_reg, base_reg);
20424 }
20425
20426 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20427    number of registers are being popped, multiple LDRD patterns are created for
20428    all register pairs.  If odd number of registers are popped, last register is
20429    loaded by using LDR pattern.  */
20430 static void
20431 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20432 {
20433   int num_regs = 0;
20434   int i, j;
20435   rtx par = NULL_RTX;
20436   rtx dwarf = NULL_RTX;
20437   rtx tmp, reg, tmp1;
20438   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20439
20440   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20441     if (saved_regs_mask & (1 << i))
20442       num_regs++;
20443
20444   gcc_assert (num_regs && num_regs <= 16);
20445
20446   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20447      to be popped.  So, if num_regs is even, now it will become odd,
20448      and we can generate pop with PC.  If num_regs is odd, it will be
20449      even now, and ldr with return can be generated for PC.  */
20450   if (return_in_pc)
20451     num_regs--;
20452
20453   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20454
20455   /* Var j iterates over all the registers to gather all the registers in
20456      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20457      A PARALLEL RTX of register-pair is created here, so that pattern for
20458      LDRD can be matched.  As PC is always last register to be popped, and
20459      we have already decremented num_regs if PC, we don't have to worry
20460      about PC in this loop.  */
20461   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20462     if (saved_regs_mask & (1 << j))
20463       {
20464         /* Create RTX for memory load.  */
20465         reg = gen_rtx_REG (SImode, j);
20466         tmp = gen_rtx_SET (reg,
20467                            gen_frame_mem (SImode,
20468                                plus_constant (Pmode,
20469                                               stack_pointer_rtx, 4 * i)));
20470         RTX_FRAME_RELATED_P (tmp) = 1;
20471
20472         if (i % 2 == 0)
20473           {
20474             /* When saved-register index (i) is even, the RTX to be emitted is
20475                yet to be created.  Hence create it first.  The LDRD pattern we
20476                are generating is :
20477                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20478                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20479                where target registers need not be consecutive.  */
20480             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20481             dwarf = NULL_RTX;
20482           }
20483
20484         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20485            added as 0th element and if i is odd, reg_i is added as 1st element
20486            of LDRD pattern shown above.  */
20487         XVECEXP (par, 0, (i % 2)) = tmp;
20488         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20489
20490         if ((i % 2) == 1)
20491           {
20492             /* When saved-register index (i) is odd, RTXs for both the registers
20493                to be loaded are generated in above given LDRD pattern, and the
20494                pattern can be emitted now.  */
20495             par = emit_insn (par);
20496             REG_NOTES (par) = dwarf;
20497             RTX_FRAME_RELATED_P (par) = 1;
20498           }
20499
20500         i++;
20501       }
20502
20503   /* If the number of registers pushed is odd AND return_in_pc is false OR
20504      number of registers are even AND return_in_pc is true, last register is
20505      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20506      then LDR with post increment.  */
20507
20508   /* Increment the stack pointer, based on there being
20509      num_regs 4-byte registers to restore.  */
20510   tmp = gen_rtx_SET (stack_pointer_rtx,
20511                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20512   RTX_FRAME_RELATED_P (tmp) = 1;
20513   tmp = emit_insn (tmp);
20514   if (!return_in_pc)
20515     {
20516       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20517                                    stack_pointer_rtx, stack_pointer_rtx);
20518     }
20519
20520   dwarf = NULL_RTX;
20521
20522   if (((num_regs % 2) == 1 && !return_in_pc)
20523       || ((num_regs % 2) == 0 && return_in_pc))
20524     {
20525       /* Scan for the single register to be popped.  Skip until the saved
20526          register is found.  */
20527       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20528
20529       /* Gen LDR with post increment here.  */
20530       tmp1 = gen_rtx_MEM (SImode,
20531                           gen_rtx_POST_INC (SImode,
20532                                             stack_pointer_rtx));
20533       set_mem_alias_set (tmp1, get_frame_alias_set ());
20534
20535       reg = gen_rtx_REG (SImode, j);
20536       tmp = gen_rtx_SET (reg, tmp1);
20537       RTX_FRAME_RELATED_P (tmp) = 1;
20538       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20539
20540       if (return_in_pc)
20541         {
20542           /* If return_in_pc, j must be PC_REGNUM.  */
20543           gcc_assert (j == PC_REGNUM);
20544           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20545           XVECEXP (par, 0, 0) = ret_rtx;
20546           XVECEXP (par, 0, 1) = tmp;
20547           par = emit_jump_insn (par);
20548         }
20549       else
20550         {
20551           par = emit_insn (tmp);
20552           REG_NOTES (par) = dwarf;
20553           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20554                                        stack_pointer_rtx, stack_pointer_rtx);
20555         }
20556
20557     }
20558   else if ((num_regs % 2) == 1 && return_in_pc)
20559     {
20560       /* There are 2 registers to be popped.  So, generate the pattern
20561          pop_multiple_with_stack_update_and_return to pop in PC.  */
20562       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20563     }
20564
20565   return;
20566 }
20567
20568 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20569    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20570    offset addressing and then generates one separate stack udpate. This provides
20571    more scheduling freedom, compared to writeback on every load.  However,
20572    if the function returns using load into PC directly
20573    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20574    before the last load.  TODO: Add a peephole optimization to recognize
20575    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20576    peephole optimization to merge the load at stack-offset zero
20577    with the stack update instruction using load with writeback
20578    in post-index addressing mode.  */
20579 static void
20580 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20581 {
20582   int j = 0;
20583   int offset = 0;
20584   rtx par = NULL_RTX;
20585   rtx dwarf = NULL_RTX;
20586   rtx tmp, mem;
20587
20588   /* Restore saved registers.  */
20589   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20590   j = 0;
20591   while (j <= LAST_ARM_REGNUM)
20592     if (saved_regs_mask & (1 << j))
20593       {
20594         if ((j % 2) == 0
20595             && (saved_regs_mask & (1 << (j + 1)))
20596             && (j + 1) != PC_REGNUM)
20597           {
20598             /* Current register and next register form register pair for which
20599                LDRD can be generated. PC is always the last register popped, and
20600                we handle it separately.  */
20601             if (offset > 0)
20602               mem = gen_frame_mem (DImode,
20603                                    plus_constant (Pmode,
20604                                                   stack_pointer_rtx,
20605                                                   offset));
20606             else
20607               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20608
20609             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20610             tmp = emit_insn (tmp);
20611             RTX_FRAME_RELATED_P (tmp) = 1;
20612
20613             /* Generate dwarf info.  */
20614
20615             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20616                                     gen_rtx_REG (SImode, j),
20617                                     NULL_RTX);
20618             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20619                                     gen_rtx_REG (SImode, j + 1),
20620                                     dwarf);
20621
20622             REG_NOTES (tmp) = dwarf;
20623
20624             offset += 8;
20625             j += 2;
20626           }
20627         else if (j != PC_REGNUM)
20628           {
20629             /* Emit a single word load.  */
20630             if (offset > 0)
20631               mem = gen_frame_mem (SImode,
20632                                    plus_constant (Pmode,
20633                                                   stack_pointer_rtx,
20634                                                   offset));
20635             else
20636               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20637
20638             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20639             tmp = emit_insn (tmp);
20640             RTX_FRAME_RELATED_P (tmp) = 1;
20641
20642             /* Generate dwarf info.  */
20643             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20644                                               gen_rtx_REG (SImode, j),
20645                                               NULL_RTX);
20646
20647             offset += 4;
20648             j += 1;
20649           }
20650         else /* j == PC_REGNUM */
20651           j++;
20652       }
20653     else
20654       j++;
20655
20656   /* Update the stack.  */
20657   if (offset > 0)
20658     {
20659       tmp = gen_rtx_SET (stack_pointer_rtx,
20660                          plus_constant (Pmode,
20661                                         stack_pointer_rtx,
20662                                         offset));
20663       tmp = emit_insn (tmp);
20664       arm_add_cfa_adjust_cfa_note (tmp, offset,
20665                                    stack_pointer_rtx, stack_pointer_rtx);
20666       offset = 0;
20667     }
20668
20669   if (saved_regs_mask & (1 << PC_REGNUM))
20670     {
20671       /* Only PC is to be popped.  */
20672       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20673       XVECEXP (par, 0, 0) = ret_rtx;
20674       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20675                          gen_frame_mem (SImode,
20676                                         gen_rtx_POST_INC (SImode,
20677                                                           stack_pointer_rtx)));
20678       RTX_FRAME_RELATED_P (tmp) = 1;
20679       XVECEXP (par, 0, 1) = tmp;
20680       par = emit_jump_insn (par);
20681
20682       /* Generate dwarf info.  */
20683       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20684                               gen_rtx_REG (SImode, PC_REGNUM),
20685                               NULL_RTX);
20686       REG_NOTES (par) = dwarf;
20687       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20688                                    stack_pointer_rtx, stack_pointer_rtx);
20689     }
20690 }
20691
20692 /* Calculate the size of the return value that is passed in registers.  */
20693 static unsigned
20694 arm_size_return_regs (void)
20695 {
20696   machine_mode mode;
20697
20698   if (crtl->return_rtx != 0)
20699     mode = GET_MODE (crtl->return_rtx);
20700   else
20701     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20702
20703   return GET_MODE_SIZE (mode);
20704 }
20705
20706 /* Return true if the current function needs to save/restore LR.  */
20707 static bool
20708 thumb_force_lr_save (void)
20709 {
20710   return !cfun->machine->lr_save_eliminated
20711          && (!crtl->is_leaf
20712              || thumb_far_jump_used_p ()
20713              || df_regs_ever_live_p (LR_REGNUM));
20714 }
20715
20716 /* We do not know if r3 will be available because
20717    we do have an indirect tailcall happening in this
20718    particular case.  */
20719 static bool
20720 is_indirect_tailcall_p (rtx call)
20721 {
20722   rtx pat = PATTERN (call);
20723
20724   /* Indirect tail call.  */
20725   pat = XVECEXP (pat, 0, 0);
20726   if (GET_CODE (pat) == SET)
20727     pat = SET_SRC (pat);
20728
20729   pat = XEXP (XEXP (pat, 0), 0);
20730   return REG_P (pat);
20731 }
20732
20733 /* Return true if r3 is used by any of the tail call insns in the
20734    current function.  */
20735 static bool
20736 any_sibcall_could_use_r3 (void)
20737 {
20738   edge_iterator ei;
20739   edge e;
20740
20741   if (!crtl->tail_call_emit)
20742     return false;
20743   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20744     if (e->flags & EDGE_SIBCALL)
20745       {
20746         rtx_insn *call = BB_END (e->src);
20747         if (!CALL_P (call))
20748           call = prev_nonnote_nondebug_insn (call);
20749         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20750         if (find_regno_fusage (call, USE, 3)
20751             || is_indirect_tailcall_p (call))
20752           return true;
20753       }
20754   return false;
20755 }
20756
20757
20758 /* Compute the distance from register FROM to register TO.
20759    These can be the arg pointer (26), the soft frame pointer (25),
20760    the stack pointer (13) or the hard frame pointer (11).
20761    In thumb mode r7 is used as the soft frame pointer, if needed.
20762    Typical stack layout looks like this:
20763
20764        old stack pointer -> |    |
20765                              ----
20766                             |    | \
20767                             |    |   saved arguments for
20768                             |    |   vararg functions
20769                             |    | /
20770                               --
20771    hard FP & arg pointer -> |    | \
20772                             |    |   stack
20773                             |    |   frame
20774                             |    | /
20775                               --
20776                             |    | \
20777                             |    |   call saved
20778                             |    |   registers
20779       soft frame pointer -> |    | /
20780                               --
20781                             |    | \
20782                             |    |   local
20783                             |    |   variables
20784      locals base pointer -> |    | /
20785                               --
20786                             |    | \
20787                             |    |   outgoing
20788                             |    |   arguments
20789    current stack pointer -> |    | /
20790                               --
20791
20792   For a given function some or all of these stack components
20793   may not be needed, giving rise to the possibility of
20794   eliminating some of the registers.
20795
20796   The values returned by this function must reflect the behavior
20797   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20798
20799   The sign of the number returned reflects the direction of stack
20800   growth, so the values are positive for all eliminations except
20801   from the soft frame pointer to the hard frame pointer.
20802
20803   SFP may point just inside the local variables block to ensure correct
20804   alignment.  */
20805
20806
20807 /* Return cached stack offsets.  */
20808
20809 static arm_stack_offsets *
20810 arm_get_frame_offsets (void)
20811 {
20812   struct arm_stack_offsets *offsets;
20813
20814   offsets = &cfun->machine->stack_offsets;
20815
20816   return offsets;
20817 }
20818
20819
20820 /* Calculate stack offsets.  These are used to calculate register elimination
20821    offsets and in prologue/epilogue code.  Also calculates which registers
20822    should be saved.  */
20823
20824 static void
20825 arm_compute_frame_layout (void)
20826 {
20827   struct arm_stack_offsets *offsets;
20828   unsigned long func_type;
20829   int saved;
20830   int core_saved;
20831   HOST_WIDE_INT frame_size;
20832   int i;
20833
20834   offsets = &cfun->machine->stack_offsets;
20835
20836   /* Initially this is the size of the local variables.  It will translated
20837      into an offset once we have determined the size of preceding data.  */
20838   frame_size = ROUND_UP_WORD (get_frame_size ());
20839
20840   /* Space for variadic functions.  */
20841   offsets->saved_args = crtl->args.pretend_args_size;
20842
20843   /* In Thumb mode this is incorrect, but never used.  */
20844   offsets->frame
20845     = (offsets->saved_args
20846        + arm_compute_static_chain_stack_bytes ()
20847        + (frame_pointer_needed ? 4 : 0));
20848
20849   if (TARGET_32BIT)
20850     {
20851       unsigned int regno;
20852
20853       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20854       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20855       saved = core_saved;
20856
20857       /* We know that SP will be doubleword aligned on entry, and we must
20858          preserve that condition at any subroutine call.  We also require the
20859          soft frame pointer to be doubleword aligned.  */
20860
20861       if (TARGET_REALLY_IWMMXT)
20862         {
20863           /* Check for the call-saved iWMMXt registers.  */
20864           for (regno = FIRST_IWMMXT_REGNUM;
20865                regno <= LAST_IWMMXT_REGNUM;
20866                regno++)
20867             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20868               saved += 8;
20869         }
20870
20871       func_type = arm_current_func_type ();
20872       /* Space for saved VFP registers.  */
20873       if (! IS_VOLATILE (func_type)
20874           && TARGET_HARD_FLOAT)
20875         saved += arm_get_vfp_saved_size ();
20876     }
20877   else /* TARGET_THUMB1 */
20878     {
20879       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20880       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20881       saved = core_saved;
20882       if (TARGET_BACKTRACE)
20883         saved += 16;
20884     }
20885
20886   /* Saved registers include the stack frame.  */
20887   offsets->saved_regs
20888     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20889   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20890
20891   /* A leaf function does not need any stack alignment if it has nothing
20892      on the stack.  */
20893   if (crtl->is_leaf && frame_size == 0
20894       /* However if it calls alloca(), we have a dynamically allocated
20895          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20896       && ! cfun->calls_alloca)
20897     {
20898       offsets->outgoing_args = offsets->soft_frame;
20899       offsets->locals_base = offsets->soft_frame;
20900       return;
20901     }
20902
20903   /* Ensure SFP has the correct alignment.  */
20904   if (ARM_DOUBLEWORD_ALIGN
20905       && (offsets->soft_frame & 7))
20906     {
20907       offsets->soft_frame += 4;
20908       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20909          when there is a stack frame as the alignment will be rolled into
20910          the normal stack adjustment.  */
20911       if (frame_size + crtl->outgoing_args_size == 0)
20912         {
20913           int reg = -1;
20914
20915           /* Register r3 is caller-saved.  Normally it does not need to be
20916              saved on entry by the prologue.  However if we choose to save
20917              it for padding then we may confuse the compiler into thinking
20918              a prologue sequence is required when in fact it is not.  This
20919              will occur when shrink-wrapping if r3 is used as a scratch
20920              register and there are no other callee-saved writes.
20921
20922              This situation can be avoided when other callee-saved registers
20923              are available and r3 is not mandatory if we choose a callee-saved
20924              register for padding.  */
20925           bool prefer_callee_reg_p = false;
20926
20927           /* If it is safe to use r3, then do so.  This sometimes
20928              generates better code on Thumb-2 by avoiding the need to
20929              use 32-bit push/pop instructions.  */
20930           if (! any_sibcall_could_use_r3 ()
20931               && arm_size_return_regs () <= 12
20932               && (offsets->saved_regs_mask & (1 << 3)) == 0
20933               && (TARGET_THUMB2
20934                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20935             {
20936               reg = 3;
20937               if (!TARGET_THUMB2)
20938                 prefer_callee_reg_p = true;
20939             }
20940           if (reg == -1
20941               || prefer_callee_reg_p)
20942             {
20943               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20944                 {
20945                   /* Avoid fixed registers; they may be changed at
20946                      arbitrary times so it's unsafe to restore them
20947                      during the epilogue.  */
20948                   if (!fixed_regs[i]
20949                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20950                     {
20951                       reg = i;
20952                       break;
20953                     }
20954                 }
20955             }
20956
20957           if (reg != -1)
20958             {
20959               offsets->saved_regs += 4;
20960               offsets->saved_regs_mask |= (1 << reg);
20961             }
20962         }
20963     }
20964
20965   offsets->locals_base = offsets->soft_frame + frame_size;
20966   offsets->outgoing_args = (offsets->locals_base
20967                             + crtl->outgoing_args_size);
20968
20969   if (ARM_DOUBLEWORD_ALIGN)
20970     {
20971       /* Ensure SP remains doubleword aligned.  */
20972       if (offsets->outgoing_args & 7)
20973         offsets->outgoing_args += 4;
20974       gcc_assert (!(offsets->outgoing_args & 7));
20975     }
20976 }
20977
20978
20979 /* Calculate the relative offsets for the different stack pointers.  Positive
20980    offsets are in the direction of stack growth.  */
20981
20982 HOST_WIDE_INT
20983 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20984 {
20985   arm_stack_offsets *offsets;
20986
20987   offsets = arm_get_frame_offsets ();
20988
20989   /* OK, now we have enough information to compute the distances.
20990      There must be an entry in these switch tables for each pair
20991      of registers in ELIMINABLE_REGS, even if some of the entries
20992      seem to be redundant or useless.  */
20993   switch (from)
20994     {
20995     case ARG_POINTER_REGNUM:
20996       switch (to)
20997         {
20998         case THUMB_HARD_FRAME_POINTER_REGNUM:
20999           return 0;
21000
21001         case FRAME_POINTER_REGNUM:
21002           /* This is the reverse of the soft frame pointer
21003              to hard frame pointer elimination below.  */
21004           return offsets->soft_frame - offsets->saved_args;
21005
21006         case ARM_HARD_FRAME_POINTER_REGNUM:
21007           /* This is only non-zero in the case where the static chain register
21008              is stored above the frame.  */
21009           return offsets->frame - offsets->saved_args - 4;
21010
21011         case STACK_POINTER_REGNUM:
21012           /* If nothing has been pushed on the stack at all
21013              then this will return -4.  This *is* correct!  */
21014           return offsets->outgoing_args - (offsets->saved_args + 4);
21015
21016         default:
21017           gcc_unreachable ();
21018         }
21019       gcc_unreachable ();
21020
21021     case FRAME_POINTER_REGNUM:
21022       switch (to)
21023         {
21024         case THUMB_HARD_FRAME_POINTER_REGNUM:
21025           return 0;
21026
21027         case ARM_HARD_FRAME_POINTER_REGNUM:
21028           /* The hard frame pointer points to the top entry in the
21029              stack frame.  The soft frame pointer to the bottom entry
21030              in the stack frame.  If there is no stack frame at all,
21031              then they are identical.  */
21032
21033           return offsets->frame - offsets->soft_frame;
21034
21035         case STACK_POINTER_REGNUM:
21036           return offsets->outgoing_args - offsets->soft_frame;
21037
21038         default:
21039           gcc_unreachable ();
21040         }
21041       gcc_unreachable ();
21042
21043     default:
21044       /* You cannot eliminate from the stack pointer.
21045          In theory you could eliminate from the hard frame
21046          pointer to the stack pointer, but this will never
21047          happen, since if a stack frame is not needed the
21048          hard frame pointer will never be used.  */
21049       gcc_unreachable ();
21050     }
21051 }
21052
21053 /* Given FROM and TO register numbers, say whether this elimination is
21054    allowed.  Frame pointer elimination is automatically handled.
21055
21056    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21057    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21058    pointer, we must eliminate FRAME_POINTER_REGNUM into
21059    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21060    ARG_POINTER_REGNUM.  */
21061
21062 bool
21063 arm_can_eliminate (const int from, const int to)
21064 {
21065   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21066           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21067           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21068           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21069            true);
21070 }
21071
21072 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21073    number of bytes pushed.  */
21074
21075 static int
21076 arm_save_coproc_regs(void)
21077 {
21078   int saved_size = 0;
21079   unsigned reg;
21080   unsigned start_reg;
21081   rtx insn;
21082
21083   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21084     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21085       {
21086         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21087         insn = gen_rtx_MEM (V2SImode, insn);
21088         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21089         RTX_FRAME_RELATED_P (insn) = 1;
21090         saved_size += 8;
21091       }
21092
21093   if (TARGET_HARD_FLOAT)
21094     {
21095       start_reg = FIRST_VFP_REGNUM;
21096
21097       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21098         {
21099           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21100               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21101             {
21102               if (start_reg != reg)
21103                 saved_size += vfp_emit_fstmd (start_reg,
21104                                               (reg - start_reg) / 2);
21105               start_reg = reg + 2;
21106             }
21107         }
21108       if (start_reg != reg)
21109         saved_size += vfp_emit_fstmd (start_reg,
21110                                       (reg - start_reg) / 2);
21111     }
21112   return saved_size;
21113 }
21114
21115
21116 /* Set the Thumb frame pointer from the stack pointer.  */
21117
21118 static void
21119 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21120 {
21121   HOST_WIDE_INT amount;
21122   rtx insn, dwarf;
21123
21124   amount = offsets->outgoing_args - offsets->locals_base;
21125   if (amount < 1024)
21126     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21127                                   stack_pointer_rtx, GEN_INT (amount)));
21128   else
21129     {
21130       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21131       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21132          expects the first two operands to be the same.  */
21133       if (TARGET_THUMB2)
21134         {
21135           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21136                                         stack_pointer_rtx,
21137                                         hard_frame_pointer_rtx));
21138         }
21139       else
21140         {
21141           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21142                                         hard_frame_pointer_rtx,
21143                                         stack_pointer_rtx));
21144         }
21145       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21146                            plus_constant (Pmode, stack_pointer_rtx, amount));
21147       RTX_FRAME_RELATED_P (dwarf) = 1;
21148       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21149     }
21150
21151   RTX_FRAME_RELATED_P (insn) = 1;
21152 }
21153
21154 struct scratch_reg {
21155   rtx reg;
21156   bool saved;
21157 };
21158
21159 /* Return a short-lived scratch register for use as a 2nd scratch register on
21160    function entry after the registers are saved in the prologue.  This register
21161    must be released by means of release_scratch_register_on_entry.  IP is not
21162    considered since it is always used as the 1st scratch register if available.
21163
21164    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21165    mask of live registers.  */
21166
21167 static void
21168 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21169                                unsigned long live_regs)
21170 {
21171   int regno = -1;
21172
21173   sr->saved = false;
21174
21175   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21176     regno = LR_REGNUM;
21177   else
21178     {
21179       unsigned int i;
21180
21181       for (i = 4; i < 11; i++)
21182         if (regno1 != i && (live_regs & (1 << i)) != 0)
21183           {
21184             regno = i;
21185             break;
21186           }
21187
21188       if (regno < 0)
21189         {
21190           /* If IP is used as the 1st scratch register for a nested function,
21191              then either r3 wasn't available or is used to preserve IP.  */
21192           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21193             regno1 = 3;
21194           regno = (regno1 == 3 ? 2 : 3);
21195           sr->saved
21196             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21197                                regno);
21198         }
21199     }
21200
21201   sr->reg = gen_rtx_REG (SImode, regno);
21202   if (sr->saved)
21203     {
21204       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21205       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21206       rtx x = gen_rtx_SET (stack_pointer_rtx,
21207                            plus_constant (Pmode, stack_pointer_rtx, -4));
21208       RTX_FRAME_RELATED_P (insn) = 1;
21209       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21210     }
21211 }
21212
21213 /* Release a scratch register obtained from the preceding function.  */
21214
21215 static void
21216 release_scratch_register_on_entry (struct scratch_reg *sr)
21217 {
21218   if (sr->saved)
21219     {
21220       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21221       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21222       rtx x = gen_rtx_SET (stack_pointer_rtx,
21223                            plus_constant (Pmode, stack_pointer_rtx, 4));
21224       RTX_FRAME_RELATED_P (insn) = 1;
21225       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21226     }
21227 }
21228
21229 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21230
21231 #if PROBE_INTERVAL > 4096
21232 #error Cannot use indexed addressing mode for stack probing
21233 #endif
21234
21235 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21236    inclusive.  These are offsets from the current stack pointer.  REGNO1
21237    is the index number of the 1st scratch register and LIVE_REGS is the
21238    mask of live registers.  */
21239
21240 static void
21241 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21242                             unsigned int regno1, unsigned long live_regs)
21243 {
21244   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21245
21246   /* See if we have a constant small number of probes to generate.  If so,
21247      that's the easy case.  */
21248   if (size <= PROBE_INTERVAL)
21249     {
21250       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21251       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21252       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21253     }
21254
21255   /* The run-time loop is made up of 10 insns in the generic case while the
21256      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21257   else if (size <= 5 * PROBE_INTERVAL)
21258     {
21259       HOST_WIDE_INT i, rem;
21260
21261       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21262       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21263       emit_stack_probe (reg1);
21264
21265       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21266          it exceeds SIZE.  If only two probes are needed, this will not
21267          generate any code.  Then probe at FIRST + SIZE.  */
21268       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21269         {
21270           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21271           emit_stack_probe (reg1);
21272         }
21273
21274       rem = size - (i - PROBE_INTERVAL);
21275       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21276         {
21277           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21278           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21279         }
21280       else
21281         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21282     }
21283
21284   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21285      extra careful with variables wrapping around because we might be at
21286      the very top (or the very bottom) of the address space and we have
21287      to be able to handle this case properly; in particular, we use an
21288      equality test for the loop condition.  */
21289   else
21290     {
21291       HOST_WIDE_INT rounded_size;
21292       struct scratch_reg sr;
21293
21294       get_scratch_register_on_entry (&sr, regno1, live_regs);
21295
21296       emit_move_insn (reg1, GEN_INT (first));
21297
21298
21299       /* Step 1: round SIZE to the previous multiple of the interval.  */
21300
21301       rounded_size = size & -PROBE_INTERVAL;
21302       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21303
21304
21305       /* Step 2: compute initial and final value of the loop counter.  */
21306
21307       /* TEST_ADDR = SP + FIRST.  */
21308       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21309
21310       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21311       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21312
21313
21314       /* Step 3: the loop
21315
21316          do
21317            {
21318              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21319              probe at TEST_ADDR
21320            }
21321          while (TEST_ADDR != LAST_ADDR)
21322
21323          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21324          until it is equal to ROUNDED_SIZE.  */
21325
21326       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21327
21328
21329       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21330          that SIZE is equal to ROUNDED_SIZE.  */
21331
21332       if (size != rounded_size)
21333         {
21334           HOST_WIDE_INT rem = size - rounded_size;
21335
21336           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21337             {
21338               emit_set_insn (sr.reg,
21339                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21340               emit_stack_probe (plus_constant (Pmode, sr.reg,
21341                                                PROBE_INTERVAL - rem));
21342             }
21343           else
21344             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21345         }
21346
21347       release_scratch_register_on_entry (&sr);
21348     }
21349
21350   /* Make sure nothing is scheduled before we are done.  */
21351   emit_insn (gen_blockage ());
21352 }
21353
21354 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21355    absolute addresses.  */
21356
21357 const char *
21358 output_probe_stack_range (rtx reg1, rtx reg2)
21359 {
21360   static int labelno = 0;
21361   char loop_lab[32];
21362   rtx xops[2];
21363
21364   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21365
21366   /* Loop.  */
21367   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21368
21369   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21370   xops[0] = reg1;
21371   xops[1] = GEN_INT (PROBE_INTERVAL);
21372   output_asm_insn ("sub\t%0, %0, %1", xops);
21373
21374   /* Probe at TEST_ADDR.  */
21375   output_asm_insn ("str\tr0, [%0, #0]", xops);
21376
21377   /* Test if TEST_ADDR == LAST_ADDR.  */
21378   xops[1] = reg2;
21379   output_asm_insn ("cmp\t%0, %1", xops);
21380
21381   /* Branch.  */
21382   fputs ("\tbne\t", asm_out_file);
21383   assemble_name_raw (asm_out_file, loop_lab);
21384   fputc ('\n', asm_out_file);
21385
21386   return "";
21387 }
21388
21389 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21390    function.  */
21391 void
21392 arm_expand_prologue (void)
21393 {
21394   rtx amount;
21395   rtx insn;
21396   rtx ip_rtx;
21397   unsigned long live_regs_mask;
21398   unsigned long func_type;
21399   int fp_offset = 0;
21400   int saved_pretend_args = 0;
21401   int saved_regs = 0;
21402   unsigned HOST_WIDE_INT args_to_push;
21403   HOST_WIDE_INT size;
21404   arm_stack_offsets *offsets;
21405   bool clobber_ip;
21406
21407   func_type = arm_current_func_type ();
21408
21409   /* Naked functions don't have prologues.  */
21410   if (IS_NAKED (func_type))
21411     {
21412       if (flag_stack_usage_info)
21413         current_function_static_stack_size = 0;
21414       return;
21415     }
21416
21417   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21418   args_to_push = crtl->args.pretend_args_size;
21419
21420   /* Compute which register we will have to save onto the stack.  */
21421   offsets = arm_get_frame_offsets ();
21422   live_regs_mask = offsets->saved_regs_mask;
21423
21424   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21425
21426   if (IS_STACKALIGN (func_type))
21427     {
21428       rtx r0, r1;
21429
21430       /* Handle a word-aligned stack pointer.  We generate the following:
21431
21432           mov r0, sp
21433           bic r1, r0, #7
21434           mov sp, r1
21435           <save and restore r0 in normal prologue/epilogue>
21436           mov sp, r0
21437           bx lr
21438
21439          The unwinder doesn't need to know about the stack realignment.
21440          Just tell it we saved SP in r0.  */
21441       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21442
21443       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21444       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21445
21446       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21447       RTX_FRAME_RELATED_P (insn) = 1;
21448       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21449
21450       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21451
21452       /* ??? The CFA changes here, which may cause GDB to conclude that it
21453          has entered a different function.  That said, the unwind info is
21454          correct, individually, before and after this instruction because
21455          we've described the save of SP, which will override the default
21456          handling of SP as restoring from the CFA.  */
21457       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21458     }
21459
21460   /* The static chain register is the same as the IP register.  If it is
21461      clobbered when creating the frame, we need to save and restore it.  */
21462   clobber_ip = IS_NESTED (func_type)
21463                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21464                    || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21465                        && !df_regs_ever_live_p (LR_REGNUM)
21466                        && arm_r3_live_at_start_p ()));
21467
21468   /* Find somewhere to store IP whilst the frame is being created.
21469      We try the following places in order:
21470
21471        1. The last argument register r3 if it is available.
21472        2. A slot on the stack above the frame if there are no
21473           arguments to push onto the stack.
21474        3. Register r3 again, after pushing the argument registers
21475           onto the stack, if this is a varargs function.
21476        4. The last slot on the stack created for the arguments to
21477           push, if this isn't a varargs function.
21478
21479      Note - we only need to tell the dwarf2 backend about the SP
21480      adjustment in the second variant; the static chain register
21481      doesn't need to be unwound, as it doesn't contain a value
21482      inherited from the caller.  */
21483   if (clobber_ip)
21484     {
21485       if (!arm_r3_live_at_start_p ())
21486         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21487       else if (args_to_push == 0)
21488         {
21489           rtx addr, dwarf;
21490
21491           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21492           saved_regs += 4;
21493
21494           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21495           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21496           fp_offset = 4;
21497
21498           /* Just tell the dwarf backend that we adjusted SP.  */
21499           dwarf = gen_rtx_SET (stack_pointer_rtx,
21500                                plus_constant (Pmode, stack_pointer_rtx,
21501                                               -fp_offset));
21502           RTX_FRAME_RELATED_P (insn) = 1;
21503           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21504         }
21505       else
21506         {
21507           /* Store the args on the stack.  */
21508           if (cfun->machine->uses_anonymous_args)
21509             {
21510               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21511                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21512               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21513               saved_pretend_args = 1;
21514             }
21515           else
21516             {
21517               rtx addr, dwarf;
21518
21519               if (args_to_push == 4)
21520                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21521               else
21522                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21523                                            plus_constant (Pmode,
21524                                                           stack_pointer_rtx,
21525                                                           -args_to_push));
21526
21527               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21528
21529               /* Just tell the dwarf backend that we adjusted SP.  */
21530               dwarf = gen_rtx_SET (stack_pointer_rtx,
21531                                    plus_constant (Pmode, stack_pointer_rtx,
21532                                                   -args_to_push));
21533               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21534             }
21535
21536           RTX_FRAME_RELATED_P (insn) = 1;
21537           fp_offset = args_to_push;
21538           args_to_push = 0;
21539         }
21540     }
21541
21542   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21543     {
21544       if (IS_INTERRUPT (func_type))
21545         {
21546           /* Interrupt functions must not corrupt any registers.
21547              Creating a frame pointer however, corrupts the IP
21548              register, so we must push it first.  */
21549           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21550
21551           /* Do not set RTX_FRAME_RELATED_P on this insn.
21552              The dwarf stack unwinding code only wants to see one
21553              stack decrement per function, and this is not it.  If
21554              this instruction is labeled as being part of the frame
21555              creation sequence then dwarf2out_frame_debug_expr will
21556              die when it encounters the assignment of IP to FP
21557              later on, since the use of SP here establishes SP as
21558              the CFA register and not IP.
21559
21560              Anyway this instruction is not really part of the stack
21561              frame creation although it is part of the prologue.  */
21562         }
21563
21564       insn = emit_set_insn (ip_rtx,
21565                             plus_constant (Pmode, stack_pointer_rtx,
21566                                            fp_offset));
21567       RTX_FRAME_RELATED_P (insn) = 1;
21568     }
21569
21570   if (args_to_push)
21571     {
21572       /* Push the argument registers, or reserve space for them.  */
21573       if (cfun->machine->uses_anonymous_args)
21574         insn = emit_multi_reg_push
21575           ((0xf0 >> (args_to_push / 4)) & 0xf,
21576            (0xf0 >> (args_to_push / 4)) & 0xf);
21577       else
21578         insn = emit_insn
21579           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21580                        GEN_INT (- args_to_push)));
21581       RTX_FRAME_RELATED_P (insn) = 1;
21582     }
21583
21584   /* If this is an interrupt service routine, and the link register
21585      is going to be pushed, and we're not generating extra
21586      push of IP (needed when frame is needed and frame layout if apcs),
21587      subtracting four from LR now will mean that the function return
21588      can be done with a single instruction.  */
21589   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21590       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21591       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21592       && TARGET_ARM)
21593     {
21594       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21595
21596       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21597     }
21598
21599   if (live_regs_mask)
21600     {
21601       unsigned long dwarf_regs_mask = live_regs_mask;
21602
21603       saved_regs += bit_count (live_regs_mask) * 4;
21604       if (optimize_size && !frame_pointer_needed
21605           && saved_regs == offsets->saved_regs - offsets->saved_args)
21606         {
21607           /* If no coprocessor registers are being pushed and we don't have
21608              to worry about a frame pointer then push extra registers to
21609              create the stack frame.  This is done in a way that does not
21610              alter the frame layout, so is independent of the epilogue.  */
21611           int n;
21612           int frame;
21613           n = 0;
21614           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21615             n++;
21616           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21617           if (frame && n * 4 >= frame)
21618             {
21619               n = frame / 4;
21620               live_regs_mask |= (1 << n) - 1;
21621               saved_regs += frame;
21622             }
21623         }
21624
21625       if (TARGET_LDRD
21626           && current_tune->prefer_ldrd_strd
21627           && !optimize_function_for_size_p (cfun))
21628         {
21629           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21630           if (TARGET_THUMB2)
21631             thumb2_emit_strd_push (live_regs_mask);
21632           else if (TARGET_ARM
21633                    && !TARGET_APCS_FRAME
21634                    && !IS_INTERRUPT (func_type))
21635             arm_emit_strd_push (live_regs_mask);
21636           else
21637             {
21638               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21639               RTX_FRAME_RELATED_P (insn) = 1;
21640             }
21641         }
21642       else
21643         {
21644           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21645           RTX_FRAME_RELATED_P (insn) = 1;
21646         }
21647     }
21648
21649   if (! IS_VOLATILE (func_type))
21650     saved_regs += arm_save_coproc_regs ();
21651
21652   if (frame_pointer_needed && TARGET_ARM)
21653     {
21654       /* Create the new frame pointer.  */
21655       if (TARGET_APCS_FRAME)
21656         {
21657           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21658           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21659           RTX_FRAME_RELATED_P (insn) = 1;
21660         }
21661       else
21662         {
21663           insn = GEN_INT (saved_regs - (4 + fp_offset));
21664           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21665                                         stack_pointer_rtx, insn));
21666           RTX_FRAME_RELATED_P (insn) = 1;
21667         }
21668     }
21669
21670   size = offsets->outgoing_args - offsets->saved_args;
21671   if (flag_stack_usage_info)
21672     current_function_static_stack_size = size;
21673
21674   /* If this isn't an interrupt service routine and we have a frame, then do
21675      stack checking.  We use IP as the first scratch register, except for the
21676      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21677   if (!IS_INTERRUPT (func_type)
21678       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21679     {
21680       unsigned int regno;
21681
21682       if (!IS_NESTED (func_type) || clobber_ip)
21683         regno = IP_REGNUM;
21684       else if (df_regs_ever_live_p (LR_REGNUM))
21685         regno = LR_REGNUM;
21686       else
21687         regno = 3;
21688
21689       if (crtl->is_leaf && !cfun->calls_alloca)
21690         {
21691           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21692             arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21693                                         size - STACK_CHECK_PROTECT,
21694                                         regno, live_regs_mask);
21695         }
21696       else if (size > 0)
21697         arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21698                                     regno, live_regs_mask);
21699     }
21700
21701   /* Recover the static chain register.  */
21702   if (clobber_ip)
21703     {
21704       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21705         insn = gen_rtx_REG (SImode, 3);
21706       else
21707         {
21708           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21709           insn = gen_frame_mem (SImode, insn);
21710         }
21711       emit_set_insn (ip_rtx, insn);
21712       emit_insn (gen_force_register_use (ip_rtx));
21713     }
21714
21715   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21716     {
21717       /* This add can produce multiple insns for a large constant, so we
21718          need to get tricky.  */
21719       rtx_insn *last = get_last_insn ();
21720
21721       amount = GEN_INT (offsets->saved_args + saved_regs
21722                         - offsets->outgoing_args);
21723
21724       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21725                                     amount));
21726       do
21727         {
21728           last = last ? NEXT_INSN (last) : get_insns ();
21729           RTX_FRAME_RELATED_P (last) = 1;
21730         }
21731       while (last != insn);
21732
21733       /* If the frame pointer is needed, emit a special barrier that
21734          will prevent the scheduler from moving stores to the frame
21735          before the stack adjustment.  */
21736       if (frame_pointer_needed)
21737         emit_insn (gen_stack_tie (stack_pointer_rtx,
21738                                   hard_frame_pointer_rtx));
21739     }
21740
21741
21742   if (frame_pointer_needed && TARGET_THUMB2)
21743     thumb_set_frame_pointer (offsets);
21744
21745   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21746     {
21747       unsigned long mask;
21748
21749       mask = live_regs_mask;
21750       mask &= THUMB2_WORK_REGS;
21751       if (!IS_NESTED (func_type))
21752         mask |= (1 << IP_REGNUM);
21753       arm_load_pic_register (mask);
21754     }
21755
21756   /* If we are profiling, make sure no instructions are scheduled before
21757      the call to mcount.  Similarly if the user has requested no
21758      scheduling in the prolog.  Similarly if we want non-call exceptions
21759      using the EABI unwinder, to prevent faulting instructions from being
21760      swapped with a stack adjustment.  */
21761   if (crtl->profile || !TARGET_SCHED_PROLOG
21762       || (arm_except_unwind_info (&global_options) == UI_TARGET
21763           && cfun->can_throw_non_call_exceptions))
21764     emit_insn (gen_blockage ());
21765
21766   /* If the link register is being kept alive, with the return address in it,
21767      then make sure that it does not get reused by the ce2 pass.  */
21768   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21769     cfun->machine->lr_save_eliminated = 1;
21770 }
21771 \f
21772 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21773 static void
21774 arm_print_condition (FILE *stream)
21775 {
21776   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21777     {
21778       /* Branch conversion is not implemented for Thumb-2.  */
21779       if (TARGET_THUMB)
21780         {
21781           output_operand_lossage ("predicated Thumb instruction");
21782           return;
21783         }
21784       if (current_insn_predicate != NULL)
21785         {
21786           output_operand_lossage
21787             ("predicated instruction in conditional sequence");
21788           return;
21789         }
21790
21791       fputs (arm_condition_codes[arm_current_cc], stream);
21792     }
21793   else if (current_insn_predicate)
21794     {
21795       enum arm_cond_code code;
21796
21797       if (TARGET_THUMB1)
21798         {
21799           output_operand_lossage ("predicated Thumb instruction");
21800           return;
21801         }
21802
21803       code = get_arm_condition_code (current_insn_predicate);
21804       fputs (arm_condition_codes[code], stream);
21805     }
21806 }
21807
21808
21809 /* Globally reserved letters: acln
21810    Puncutation letters currently used: @_|?().!#
21811    Lower case letters currently used: bcdefhimpqtvwxyz
21812    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21813    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21814
21815    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21816
21817    If CODE is 'd', then the X is a condition operand and the instruction
21818    should only be executed if the condition is true.
21819    if CODE is 'D', then the X is a condition operand and the instruction
21820    should only be executed if the condition is false: however, if the mode
21821    of the comparison is CCFPEmode, then always execute the instruction -- we
21822    do this because in these circumstances !GE does not necessarily imply LT;
21823    in these cases the instruction pattern will take care to make sure that
21824    an instruction containing %d will follow, thereby undoing the effects of
21825    doing this instruction unconditionally.
21826    If CODE is 'N' then X is a floating point operand that must be negated
21827    before output.
21828    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21829    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21830 static void
21831 arm_print_operand (FILE *stream, rtx x, int code)
21832 {
21833   switch (code)
21834     {
21835     case '@':
21836       fputs (ASM_COMMENT_START, stream);
21837       return;
21838
21839     case '_':
21840       fputs (user_label_prefix, stream);
21841       return;
21842
21843     case '|':
21844       fputs (REGISTER_PREFIX, stream);
21845       return;
21846
21847     case '?':
21848       arm_print_condition (stream);
21849       return;
21850
21851     case '.':
21852       /* The current condition code for a condition code setting instruction.
21853          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21854       fputc('s', stream);
21855       arm_print_condition (stream);
21856       return;
21857
21858     case '!':
21859       /* If the instruction is conditionally executed then print
21860          the current condition code, otherwise print 's'.  */
21861       gcc_assert (TARGET_THUMB2);
21862       if (current_insn_predicate)
21863         arm_print_condition (stream);
21864       else
21865         fputc('s', stream);
21866       break;
21867
21868     /* %# is a "break" sequence. It doesn't output anything, but is used to
21869        separate e.g. operand numbers from following text, if that text consists
21870        of further digits which we don't want to be part of the operand
21871        number.  */
21872     case '#':
21873       return;
21874
21875     case 'N':
21876       {
21877         REAL_VALUE_TYPE r;
21878         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21879         fprintf (stream, "%s", fp_const_from_val (&r));
21880       }
21881       return;
21882
21883     /* An integer or symbol address without a preceding # sign.  */
21884     case 'c':
21885       switch (GET_CODE (x))
21886         {
21887         case CONST_INT:
21888           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21889           break;
21890
21891         case SYMBOL_REF:
21892           output_addr_const (stream, x);
21893           break;
21894
21895         case CONST:
21896           if (GET_CODE (XEXP (x, 0)) == PLUS
21897               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21898             {
21899               output_addr_const (stream, x);
21900               break;
21901             }
21902           /* Fall through.  */
21903
21904         default:
21905           output_operand_lossage ("Unsupported operand for code '%c'", code);
21906         }
21907       return;
21908
21909     /* An integer that we want to print in HEX.  */
21910     case 'x':
21911       switch (GET_CODE (x))
21912         {
21913         case CONST_INT:
21914           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21915           break;
21916
21917         default:
21918           output_operand_lossage ("Unsupported operand for code '%c'", code);
21919         }
21920       return;
21921
21922     case 'B':
21923       if (CONST_INT_P (x))
21924         {
21925           HOST_WIDE_INT val;
21926           val = ARM_SIGN_EXTEND (~INTVAL (x));
21927           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21928         }
21929       else
21930         {
21931           putc ('~', stream);
21932           output_addr_const (stream, x);
21933         }
21934       return;
21935
21936     case 'b':
21937       /* Print the log2 of a CONST_INT.  */
21938       {
21939         HOST_WIDE_INT val;
21940
21941         if (!CONST_INT_P (x)
21942             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21943           output_operand_lossage ("Unsupported operand for code '%c'", code);
21944         else
21945           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21946       }
21947       return;
21948
21949     case 'L':
21950       /* The low 16 bits of an immediate constant.  */
21951       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21952       return;
21953
21954     case 'i':
21955       fprintf (stream, "%s", arithmetic_instr (x, 1));
21956       return;
21957
21958     case 'I':
21959       fprintf (stream, "%s", arithmetic_instr (x, 0));
21960       return;
21961
21962     case 'S':
21963       {
21964         HOST_WIDE_INT val;
21965         const char *shift;
21966
21967         shift = shift_op (x, &val);
21968
21969         if (shift)
21970           {
21971             fprintf (stream, ", %s ", shift);
21972             if (val == -1)
21973               arm_print_operand (stream, XEXP (x, 1), 0);
21974             else
21975               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21976           }
21977       }
21978       return;
21979
21980       /* An explanation of the 'Q', 'R' and 'H' register operands:
21981
21982          In a pair of registers containing a DI or DF value the 'Q'
21983          operand returns the register number of the register containing
21984          the least significant part of the value.  The 'R' operand returns
21985          the register number of the register containing the most
21986          significant part of the value.
21987
21988          The 'H' operand returns the higher of the two register numbers.
21989          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21990          same as the 'Q' operand, since the most significant part of the
21991          value is held in the lower number register.  The reverse is true
21992          on systems where WORDS_BIG_ENDIAN is false.
21993
21994          The purpose of these operands is to distinguish between cases
21995          where the endian-ness of the values is important (for example
21996          when they are added together), and cases where the endian-ness
21997          is irrelevant, but the order of register operations is important.
21998          For example when loading a value from memory into a register
21999          pair, the endian-ness does not matter.  Provided that the value
22000          from the lower memory address is put into the lower numbered
22001          register, and the value from the higher address is put into the
22002          higher numbered register, the load will work regardless of whether
22003          the value being loaded is big-wordian or little-wordian.  The
22004          order of the two register loads can matter however, if the address
22005          of the memory location is actually held in one of the registers
22006          being overwritten by the load.
22007
22008          The 'Q' and 'R' constraints are also available for 64-bit
22009          constants.  */
22010     case 'Q':
22011       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22012         {
22013           rtx part = gen_lowpart (SImode, x);
22014           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22015           return;
22016         }
22017
22018       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22019         {
22020           output_operand_lossage ("invalid operand for code '%c'", code);
22021           return;
22022         }
22023
22024       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22025       return;
22026
22027     case 'R':
22028       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22029         {
22030           machine_mode mode = GET_MODE (x);
22031           rtx part;
22032
22033           if (mode == VOIDmode)
22034             mode = DImode;
22035           part = gen_highpart_mode (SImode, mode, x);
22036           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22037           return;
22038         }
22039
22040       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22041         {
22042           output_operand_lossage ("invalid operand for code '%c'", code);
22043           return;
22044         }
22045
22046       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22047       return;
22048
22049     case 'H':
22050       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22051         {
22052           output_operand_lossage ("invalid operand for code '%c'", code);
22053           return;
22054         }
22055
22056       asm_fprintf (stream, "%r", REGNO (x) + 1);
22057       return;
22058
22059     case 'J':
22060       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22061         {
22062           output_operand_lossage ("invalid operand for code '%c'", code);
22063           return;
22064         }
22065
22066       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22067       return;
22068
22069     case 'K':
22070       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22071         {
22072           output_operand_lossage ("invalid operand for code '%c'", code);
22073           return;
22074         }
22075
22076       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22077       return;
22078
22079     case 'm':
22080       asm_fprintf (stream, "%r",
22081                    REG_P (XEXP (x, 0))
22082                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22083       return;
22084
22085     case 'M':
22086       asm_fprintf (stream, "{%r-%r}",
22087                    REGNO (x),
22088                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22089       return;
22090
22091     /* Like 'M', but writing doubleword vector registers, for use by Neon
22092        insns.  */
22093     case 'h':
22094       {
22095         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22096         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22097         if (numregs == 1)
22098           asm_fprintf (stream, "{d%d}", regno);
22099         else
22100           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22101       }
22102       return;
22103
22104     case 'd':
22105       /* CONST_TRUE_RTX means always -- that's the default.  */
22106       if (x == const_true_rtx)
22107         return;
22108
22109       if (!COMPARISON_P (x))
22110         {
22111           output_operand_lossage ("invalid operand for code '%c'", code);
22112           return;
22113         }
22114
22115       fputs (arm_condition_codes[get_arm_condition_code (x)],
22116              stream);
22117       return;
22118
22119     case 'D':
22120       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22121          want to do that.  */
22122       if (x == const_true_rtx)
22123         {
22124           output_operand_lossage ("instruction never executed");
22125           return;
22126         }
22127       if (!COMPARISON_P (x))
22128         {
22129           output_operand_lossage ("invalid operand for code '%c'", code);
22130           return;
22131         }
22132
22133       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22134                                  (get_arm_condition_code (x))],
22135              stream);
22136       return;
22137
22138     case 's':
22139     case 'V':
22140     case 'W':
22141     case 'X':
22142     case 'Y':
22143     case 'Z':
22144       /* Former Maverick support, removed after GCC-4.7.  */
22145       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22146       return;
22147
22148     case 'U':
22149       if (!REG_P (x)
22150           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22151           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22152         /* Bad value for wCG register number.  */
22153         {
22154           output_operand_lossage ("invalid operand for code '%c'", code);
22155           return;
22156         }
22157
22158       else
22159         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22160       return;
22161
22162       /* Print an iWMMXt control register name.  */
22163     case 'w':
22164       if (!CONST_INT_P (x)
22165           || INTVAL (x) < 0
22166           || INTVAL (x) >= 16)
22167         /* Bad value for wC register number.  */
22168         {
22169           output_operand_lossage ("invalid operand for code '%c'", code);
22170           return;
22171         }
22172
22173       else
22174         {
22175           static const char * wc_reg_names [16] =
22176             {
22177               "wCID",  "wCon",  "wCSSF", "wCASF",
22178               "wC4",   "wC5",   "wC6",   "wC7",
22179               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22180               "wC12",  "wC13",  "wC14",  "wC15"
22181             };
22182
22183           fputs (wc_reg_names [INTVAL (x)], stream);
22184         }
22185       return;
22186
22187     /* Print the high single-precision register of a VFP double-precision
22188        register.  */
22189     case 'p':
22190       {
22191         machine_mode mode = GET_MODE (x);
22192         int regno;
22193
22194         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22195           {
22196             output_operand_lossage ("invalid operand for code '%c'", code);
22197             return;
22198           }
22199
22200         regno = REGNO (x);
22201         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22202           {
22203             output_operand_lossage ("invalid operand for code '%c'", code);
22204             return;
22205           }
22206
22207         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22208       }
22209       return;
22210
22211     /* Print a VFP/Neon double precision or quad precision register name.  */
22212     case 'P':
22213     case 'q':
22214       {
22215         machine_mode mode = GET_MODE (x);
22216         int is_quad = (code == 'q');
22217         int regno;
22218
22219         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22220           {
22221             output_operand_lossage ("invalid operand for code '%c'", code);
22222             return;
22223           }
22224
22225         if (!REG_P (x)
22226             || !IS_VFP_REGNUM (REGNO (x)))
22227           {
22228             output_operand_lossage ("invalid operand for code '%c'", code);
22229             return;
22230           }
22231
22232         regno = REGNO (x);
22233         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22234             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22235           {
22236             output_operand_lossage ("invalid operand for code '%c'", code);
22237             return;
22238           }
22239
22240         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22241           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22242       }
22243       return;
22244
22245     /* These two codes print the low/high doubleword register of a Neon quad
22246        register, respectively.  For pair-structure types, can also print
22247        low/high quadword registers.  */
22248     case 'e':
22249     case 'f':
22250       {
22251         machine_mode mode = GET_MODE (x);
22252         int regno;
22253
22254         if ((GET_MODE_SIZE (mode) != 16
22255              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22256           {
22257             output_operand_lossage ("invalid operand for code '%c'", code);
22258             return;
22259           }
22260
22261         regno = REGNO (x);
22262         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22263           {
22264             output_operand_lossage ("invalid operand for code '%c'", code);
22265             return;
22266           }
22267
22268         if (GET_MODE_SIZE (mode) == 16)
22269           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22270                                   + (code == 'f' ? 1 : 0));
22271         else
22272           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22273                                   + (code == 'f' ? 1 : 0));
22274       }
22275       return;
22276
22277     /* Print a VFPv3 floating-point constant, represented as an integer
22278        index.  */
22279     case 'G':
22280       {
22281         int index = vfp3_const_double_index (x);
22282         gcc_assert (index != -1);
22283         fprintf (stream, "%d", index);
22284       }
22285       return;
22286
22287     /* Print bits representing opcode features for Neon.
22288
22289        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22290        and polynomials as unsigned.
22291
22292        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22293
22294        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22295
22296     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22297     case 'T':
22298       {
22299         HOST_WIDE_INT bits = INTVAL (x);
22300         fputc ("uspf"[bits & 3], stream);
22301       }
22302       return;
22303
22304     /* Likewise, but signed and unsigned integers are both 'i'.  */
22305     case 'F':
22306       {
22307         HOST_WIDE_INT bits = INTVAL (x);
22308         fputc ("iipf"[bits & 3], stream);
22309       }
22310       return;
22311
22312     /* As for 'T', but emit 'u' instead of 'p'.  */
22313     case 't':
22314       {
22315         HOST_WIDE_INT bits = INTVAL (x);
22316         fputc ("usuf"[bits & 3], stream);
22317       }
22318       return;
22319
22320     /* Bit 2: rounding (vs none).  */
22321     case 'O':
22322       {
22323         HOST_WIDE_INT bits = INTVAL (x);
22324         fputs ((bits & 4) != 0 ? "r" : "", stream);
22325       }
22326       return;
22327
22328     /* Memory operand for vld1/vst1 instruction.  */
22329     case 'A':
22330       {
22331         rtx addr;
22332         bool postinc = FALSE;
22333         rtx postinc_reg = NULL;
22334         unsigned align, memsize, align_bits;
22335
22336         gcc_assert (MEM_P (x));
22337         addr = XEXP (x, 0);
22338         if (GET_CODE (addr) == POST_INC)
22339           {
22340             postinc = 1;
22341             addr = XEXP (addr, 0);
22342           }
22343         if (GET_CODE (addr) == POST_MODIFY)
22344           {
22345             postinc_reg = XEXP( XEXP (addr, 1), 1);
22346             addr = XEXP (addr, 0);
22347           }
22348         asm_fprintf (stream, "[%r", REGNO (addr));
22349
22350         /* We know the alignment of this access, so we can emit a hint in the
22351            instruction (for some alignments) as an aid to the memory subsystem
22352            of the target.  */
22353         align = MEM_ALIGN (x) >> 3;
22354         memsize = MEM_SIZE (x);
22355
22356         /* Only certain alignment specifiers are supported by the hardware.  */
22357         if (memsize == 32 && (align % 32) == 0)
22358           align_bits = 256;
22359         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22360           align_bits = 128;
22361         else if (memsize >= 8 && (align % 8) == 0)
22362           align_bits = 64;
22363         else
22364           align_bits = 0;
22365
22366         if (align_bits != 0)
22367           asm_fprintf (stream, ":%d", align_bits);
22368
22369         asm_fprintf (stream, "]");
22370
22371         if (postinc)
22372           fputs("!", stream);
22373         if (postinc_reg)
22374           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22375       }
22376       return;
22377
22378     case 'C':
22379       {
22380         rtx addr;
22381
22382         gcc_assert (MEM_P (x));
22383         addr = XEXP (x, 0);
22384         gcc_assert (REG_P (addr));
22385         asm_fprintf (stream, "[%r]", REGNO (addr));
22386       }
22387       return;
22388
22389     /* Translate an S register number into a D register number and element index.  */
22390     case 'y':
22391       {
22392         machine_mode mode = GET_MODE (x);
22393         int regno;
22394
22395         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22396           {
22397             output_operand_lossage ("invalid operand for code '%c'", code);
22398             return;
22399           }
22400
22401         regno = REGNO (x);
22402         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22403           {
22404             output_operand_lossage ("invalid operand for code '%c'", code);
22405             return;
22406           }
22407
22408         regno = regno - FIRST_VFP_REGNUM;
22409         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22410       }
22411       return;
22412
22413     case 'v':
22414         gcc_assert (CONST_DOUBLE_P (x));
22415         int result;
22416         result = vfp3_const_double_for_fract_bits (x);
22417         if (result == 0)
22418           result = vfp3_const_double_for_bits (x);
22419         fprintf (stream, "#%d", result);
22420         return;
22421
22422     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22423        number into a D register number and element index.  */
22424     case 'z':
22425       {
22426         machine_mode mode = GET_MODE (x);
22427         int regno;
22428
22429         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22430           {
22431             output_operand_lossage ("invalid operand for code '%c'", code);
22432             return;
22433           }
22434
22435         regno = REGNO (x);
22436         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22437           {
22438             output_operand_lossage ("invalid operand for code '%c'", code);
22439             return;
22440           }
22441
22442         regno = regno - FIRST_VFP_REGNUM;
22443         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22444       }
22445       return;
22446
22447     default:
22448       if (x == 0)
22449         {
22450           output_operand_lossage ("missing operand");
22451           return;
22452         }
22453
22454       switch (GET_CODE (x))
22455         {
22456         case REG:
22457           asm_fprintf (stream, "%r", REGNO (x));
22458           break;
22459
22460         case MEM:
22461           output_address (GET_MODE (x), XEXP (x, 0));
22462           break;
22463
22464         case CONST_DOUBLE:
22465           {
22466             char fpstr[20];
22467             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22468                               sizeof (fpstr), 0, 1);
22469             fprintf (stream, "#%s", fpstr);
22470           }
22471           break;
22472
22473         default:
22474           gcc_assert (GET_CODE (x) != NEG);
22475           fputc ('#', stream);
22476           if (GET_CODE (x) == HIGH)
22477             {
22478               fputs (":lower16:", stream);
22479               x = XEXP (x, 0);
22480             }
22481
22482           output_addr_const (stream, x);
22483           break;
22484         }
22485     }
22486 }
22487 \f
22488 /* Target hook for printing a memory address.  */
22489 static void
22490 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22491 {
22492   if (TARGET_32BIT)
22493     {
22494       int is_minus = GET_CODE (x) == MINUS;
22495
22496       if (REG_P (x))
22497         asm_fprintf (stream, "[%r]", REGNO (x));
22498       else if (GET_CODE (x) == PLUS || is_minus)
22499         {
22500           rtx base = XEXP (x, 0);
22501           rtx index = XEXP (x, 1);
22502           HOST_WIDE_INT offset = 0;
22503           if (!REG_P (base)
22504               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22505             {
22506               /* Ensure that BASE is a register.  */
22507               /* (one of them must be).  */
22508               /* Also ensure the SP is not used as in index register.  */
22509               std::swap (base, index);
22510             }
22511           switch (GET_CODE (index))
22512             {
22513             case CONST_INT:
22514               offset = INTVAL (index);
22515               if (is_minus)
22516                 offset = -offset;
22517               asm_fprintf (stream, "[%r, #%wd]",
22518                            REGNO (base), offset);
22519               break;
22520
22521             case REG:
22522               asm_fprintf (stream, "[%r, %s%r]",
22523                            REGNO (base), is_minus ? "-" : "",
22524                            REGNO (index));
22525               break;
22526
22527             case MULT:
22528             case ASHIFTRT:
22529             case LSHIFTRT:
22530             case ASHIFT:
22531             case ROTATERT:
22532               {
22533                 asm_fprintf (stream, "[%r, %s%r",
22534                              REGNO (base), is_minus ? "-" : "",
22535                              REGNO (XEXP (index, 0)));
22536                 arm_print_operand (stream, index, 'S');
22537                 fputs ("]", stream);
22538                 break;
22539               }
22540
22541             default:
22542               gcc_unreachable ();
22543             }
22544         }
22545       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22546                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22547         {
22548           gcc_assert (REG_P (XEXP (x, 0)));
22549
22550           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22551             asm_fprintf (stream, "[%r, #%s%d]!",
22552                          REGNO (XEXP (x, 0)),
22553                          GET_CODE (x) == PRE_DEC ? "-" : "",
22554                          GET_MODE_SIZE (mode));
22555           else
22556             asm_fprintf (stream, "[%r], #%s%d",
22557                          REGNO (XEXP (x, 0)),
22558                          GET_CODE (x) == POST_DEC ? "-" : "",
22559                          GET_MODE_SIZE (mode));
22560         }
22561       else if (GET_CODE (x) == PRE_MODIFY)
22562         {
22563           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22564           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22565             asm_fprintf (stream, "#%wd]!",
22566                          INTVAL (XEXP (XEXP (x, 1), 1)));
22567           else
22568             asm_fprintf (stream, "%r]!",
22569                          REGNO (XEXP (XEXP (x, 1), 1)));
22570         }
22571       else if (GET_CODE (x) == POST_MODIFY)
22572         {
22573           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22574           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22575             asm_fprintf (stream, "#%wd",
22576                          INTVAL (XEXP (XEXP (x, 1), 1)));
22577           else
22578             asm_fprintf (stream, "%r",
22579                          REGNO (XEXP (XEXP (x, 1), 1)));
22580         }
22581       else output_addr_const (stream, x);
22582     }
22583   else
22584     {
22585       if (REG_P (x))
22586         asm_fprintf (stream, "[%r]", REGNO (x));
22587       else if (GET_CODE (x) == POST_INC)
22588         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22589       else if (GET_CODE (x) == PLUS)
22590         {
22591           gcc_assert (REG_P (XEXP (x, 0)));
22592           if (CONST_INT_P (XEXP (x, 1)))
22593             asm_fprintf (stream, "[%r, #%wd]",
22594                          REGNO (XEXP (x, 0)),
22595                          INTVAL (XEXP (x, 1)));
22596           else
22597             asm_fprintf (stream, "[%r, %r]",
22598                          REGNO (XEXP (x, 0)),
22599                          REGNO (XEXP (x, 1)));
22600         }
22601       else
22602         output_addr_const (stream, x);
22603     }
22604 }
22605 \f
22606 /* Target hook for indicating whether a punctuation character for
22607    TARGET_PRINT_OPERAND is valid.  */
22608 static bool
22609 arm_print_operand_punct_valid_p (unsigned char code)
22610 {
22611   return (code == '@' || code == '|' || code == '.'
22612           || code == '(' || code == ')' || code == '#'
22613           || (TARGET_32BIT && (code == '?'))
22614           || (TARGET_THUMB2 && (code == '!'))
22615           || (TARGET_THUMB && (code == '_')));
22616 }
22617 \f
22618 /* Target hook for assembling integer objects.  The ARM version needs to
22619    handle word-sized values specially.  */
22620 static bool
22621 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22622 {
22623   machine_mode mode;
22624
22625   if (size == UNITS_PER_WORD && aligned_p)
22626     {
22627       fputs ("\t.word\t", asm_out_file);
22628       output_addr_const (asm_out_file, x);
22629
22630       /* Mark symbols as position independent.  We only do this in the
22631          .text segment, not in the .data segment.  */
22632       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22633           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22634         {
22635           /* See legitimize_pic_address for an explanation of the
22636              TARGET_VXWORKS_RTP check.  */
22637           /* References to weak symbols cannot be resolved locally:
22638              they may be overridden by a non-weak definition at link
22639              time.  */
22640           if (!arm_pic_data_is_text_relative
22641               || (GET_CODE (x) == SYMBOL_REF
22642                   && (!SYMBOL_REF_LOCAL_P (x)
22643                       || (SYMBOL_REF_DECL (x)
22644                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22645             fputs ("(GOT)", asm_out_file);
22646           else
22647             fputs ("(GOTOFF)", asm_out_file);
22648         }
22649       fputc ('\n', asm_out_file);
22650       return true;
22651     }
22652
22653   mode = GET_MODE (x);
22654
22655   if (arm_vector_mode_supported_p (mode))
22656     {
22657       int i, units;
22658
22659       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22660
22661       units = CONST_VECTOR_NUNITS (x);
22662       size = GET_MODE_UNIT_SIZE (mode);
22663
22664       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22665         for (i = 0; i < units; i++)
22666           {
22667             rtx elt = CONST_VECTOR_ELT (x, i);
22668             assemble_integer
22669               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22670           }
22671       else
22672         for (i = 0; i < units; i++)
22673           {
22674             rtx elt = CONST_VECTOR_ELT (x, i);
22675             assemble_real
22676               (*CONST_DOUBLE_REAL_VALUE (elt),
22677                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22678                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22679           }
22680
22681       return true;
22682     }
22683
22684   return default_assemble_integer (x, size, aligned_p);
22685 }
22686
22687 static void
22688 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22689 {
22690   section *s;
22691
22692   if (!TARGET_AAPCS_BASED)
22693     {
22694       (is_ctor ?
22695        default_named_section_asm_out_constructor
22696        : default_named_section_asm_out_destructor) (symbol, priority);
22697       return;
22698     }
22699
22700   /* Put these in the .init_array section, using a special relocation.  */
22701   if (priority != DEFAULT_INIT_PRIORITY)
22702     {
22703       char buf[18];
22704       sprintf (buf, "%s.%.5u",
22705                is_ctor ? ".init_array" : ".fini_array",
22706                priority);
22707       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22708     }
22709   else if (is_ctor)
22710     s = ctors_section;
22711   else
22712     s = dtors_section;
22713
22714   switch_to_section (s);
22715   assemble_align (POINTER_SIZE);
22716   fputs ("\t.word\t", asm_out_file);
22717   output_addr_const (asm_out_file, symbol);
22718   fputs ("(target1)\n", asm_out_file);
22719 }
22720
22721 /* Add a function to the list of static constructors.  */
22722
22723 static void
22724 arm_elf_asm_constructor (rtx symbol, int priority)
22725 {
22726   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22727 }
22728
22729 /* Add a function to the list of static destructors.  */
22730
22731 static void
22732 arm_elf_asm_destructor (rtx symbol, int priority)
22733 {
22734   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22735 }
22736 \f
22737 /* A finite state machine takes care of noticing whether or not instructions
22738    can be conditionally executed, and thus decrease execution time and code
22739    size by deleting branch instructions.  The fsm is controlled by
22740    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22741
22742 /* The state of the fsm controlling condition codes are:
22743    0: normal, do nothing special
22744    1: make ASM_OUTPUT_OPCODE not output this instruction
22745    2: make ASM_OUTPUT_OPCODE not output this instruction
22746    3: make instructions conditional
22747    4: make instructions conditional
22748
22749    State transitions (state->state by whom under condition):
22750    0 -> 1 final_prescan_insn if the `target' is a label
22751    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22752    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22753    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22754    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22755           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22756    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22757           (the target insn is arm_target_insn).
22758
22759    If the jump clobbers the conditions then we use states 2 and 4.
22760
22761    A similar thing can be done with conditional return insns.
22762
22763    XXX In case the `target' is an unconditional branch, this conditionalising
22764    of the instructions always reduces code size, but not always execution
22765    time.  But then, I want to reduce the code size to somewhere near what
22766    /bin/cc produces.  */
22767
22768 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22769    instructions.  When a COND_EXEC instruction is seen the subsequent
22770    instructions are scanned so that multiple conditional instructions can be
22771    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22772    specify the length and true/false mask for the IT block.  These will be
22773    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22774
22775 /* Returns the index of the ARM condition code string in
22776    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22777    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22778
22779 enum arm_cond_code
22780 maybe_get_arm_condition_code (rtx comparison)
22781 {
22782   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22783   enum arm_cond_code code;
22784   enum rtx_code comp_code = GET_CODE (comparison);
22785
22786   if (GET_MODE_CLASS (mode) != MODE_CC)
22787     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22788                            XEXP (comparison, 1));
22789
22790   switch (mode)
22791     {
22792     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22793     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22794     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22795     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22796     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22797     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22798     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22799     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22800     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22801     case E_CC_DLTUmode: code = ARM_CC;
22802
22803     dominance:
22804       if (comp_code == EQ)
22805         return ARM_INVERSE_CONDITION_CODE (code);
22806       if (comp_code == NE)
22807         return code;
22808       return ARM_NV;
22809
22810     case E_CC_NOOVmode:
22811       switch (comp_code)
22812         {
22813         case NE: return ARM_NE;
22814         case EQ: return ARM_EQ;
22815         case GE: return ARM_PL;
22816         case LT: return ARM_MI;
22817         default: return ARM_NV;
22818         }
22819
22820     case E_CC_Zmode:
22821       switch (comp_code)
22822         {
22823         case NE: return ARM_NE;
22824         case EQ: return ARM_EQ;
22825         default: return ARM_NV;
22826         }
22827
22828     case E_CC_Nmode:
22829       switch (comp_code)
22830         {
22831         case NE: return ARM_MI;
22832         case EQ: return ARM_PL;
22833         default: return ARM_NV;
22834         }
22835
22836     case E_CCFPEmode:
22837     case E_CCFPmode:
22838       /* We can handle all cases except UNEQ and LTGT.  */
22839       switch (comp_code)
22840         {
22841         case GE: return ARM_GE;
22842         case GT: return ARM_GT;
22843         case LE: return ARM_LS;
22844         case LT: return ARM_MI;
22845         case NE: return ARM_NE;
22846         case EQ: return ARM_EQ;
22847         case ORDERED: return ARM_VC;
22848         case UNORDERED: return ARM_VS;
22849         case UNLT: return ARM_LT;
22850         case UNLE: return ARM_LE;
22851         case UNGT: return ARM_HI;
22852         case UNGE: return ARM_PL;
22853           /* UNEQ and LTGT do not have a representation.  */
22854         case UNEQ: /* Fall through.  */
22855         case LTGT: /* Fall through.  */
22856         default: return ARM_NV;
22857         }
22858
22859     case E_CC_SWPmode:
22860       switch (comp_code)
22861         {
22862         case NE: return ARM_NE;
22863         case EQ: return ARM_EQ;
22864         case GE: return ARM_LE;
22865         case GT: return ARM_LT;
22866         case LE: return ARM_GE;
22867         case LT: return ARM_GT;
22868         case GEU: return ARM_LS;
22869         case GTU: return ARM_CC;
22870         case LEU: return ARM_CS;
22871         case LTU: return ARM_HI;
22872         default: return ARM_NV;
22873         }
22874
22875     case E_CC_Cmode:
22876       switch (comp_code)
22877         {
22878         case LTU: return ARM_CS;
22879         case GEU: return ARM_CC;
22880         case NE: return ARM_CS;
22881         case EQ: return ARM_CC;
22882         default: return ARM_NV;
22883         }
22884
22885     case E_CC_CZmode:
22886       switch (comp_code)
22887         {
22888         case NE: return ARM_NE;
22889         case EQ: return ARM_EQ;
22890         case GEU: return ARM_CS;
22891         case GTU: return ARM_HI;
22892         case LEU: return ARM_LS;
22893         case LTU: return ARM_CC;
22894         default: return ARM_NV;
22895         }
22896
22897     case E_CC_NCVmode:
22898       switch (comp_code)
22899         {
22900         case GE: return ARM_GE;
22901         case LT: return ARM_LT;
22902         case GEU: return ARM_CS;
22903         case LTU: return ARM_CC;
22904         default: return ARM_NV;
22905         }
22906
22907     case E_CC_Vmode:
22908       switch (comp_code)
22909         {
22910         case NE: return ARM_VS;
22911         case EQ: return ARM_VC;
22912         default: return ARM_NV;
22913         }
22914
22915     case E_CCmode:
22916       switch (comp_code)
22917         {
22918         case NE: return ARM_NE;
22919         case EQ: return ARM_EQ;
22920         case GE: return ARM_GE;
22921         case GT: return ARM_GT;
22922         case LE: return ARM_LE;
22923         case LT: return ARM_LT;
22924         case GEU: return ARM_CS;
22925         case GTU: return ARM_HI;
22926         case LEU: return ARM_LS;
22927         case LTU: return ARM_CC;
22928         default: return ARM_NV;
22929         }
22930
22931     default: gcc_unreachable ();
22932     }
22933 }
22934
22935 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22936 static enum arm_cond_code
22937 get_arm_condition_code (rtx comparison)
22938 {
22939   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22940   gcc_assert (code != ARM_NV);
22941   return code;
22942 }
22943
22944 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22945    code registers when not targetting Thumb1.  The VFP condition register
22946    only exists when generating hard-float code.  */
22947 static bool
22948 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22949 {
22950   if (!TARGET_32BIT)
22951     return false;
22952
22953   *p1 = CC_REGNUM;
22954   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22955   return true;
22956 }
22957
22958 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22959    instructions.  */
22960 void
22961 thumb2_final_prescan_insn (rtx_insn *insn)
22962 {
22963   rtx_insn *first_insn = insn;
22964   rtx body = PATTERN (insn);
22965   rtx predicate;
22966   enum arm_cond_code code;
22967   int n;
22968   int mask;
22969   int max;
22970
22971   /* max_insns_skipped in the tune was already taken into account in the
22972      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22973      just emit the IT blocks as we can.  It does not make sense to split
22974      the IT blocks.  */
22975   max = MAX_INSN_PER_IT_BLOCK;
22976
22977   /* Remove the previous insn from the count of insns to be output.  */
22978   if (arm_condexec_count)
22979       arm_condexec_count--;
22980
22981   /* Nothing to do if we are already inside a conditional block.  */
22982   if (arm_condexec_count)
22983     return;
22984
22985   if (GET_CODE (body) != COND_EXEC)
22986     return;
22987
22988   /* Conditional jumps are implemented directly.  */
22989   if (JUMP_P (insn))
22990     return;
22991
22992   predicate = COND_EXEC_TEST (body);
22993   arm_current_cc = get_arm_condition_code (predicate);
22994
22995   n = get_attr_ce_count (insn);
22996   arm_condexec_count = 1;
22997   arm_condexec_mask = (1 << n) - 1;
22998   arm_condexec_masklen = n;
22999   /* See if subsequent instructions can be combined into the same block.  */
23000   for (;;)
23001     {
23002       insn = next_nonnote_insn (insn);
23003
23004       /* Jumping into the middle of an IT block is illegal, so a label or
23005          barrier terminates the block.  */
23006       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23007         break;
23008
23009       body = PATTERN (insn);
23010       /* USE and CLOBBER aren't really insns, so just skip them.  */
23011       if (GET_CODE (body) == USE
23012           || GET_CODE (body) == CLOBBER)
23013         continue;
23014
23015       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23016       if (GET_CODE (body) != COND_EXEC)
23017         break;
23018       /* Maximum number of conditionally executed instructions in a block.  */
23019       n = get_attr_ce_count (insn);
23020       if (arm_condexec_masklen + n > max)
23021         break;
23022
23023       predicate = COND_EXEC_TEST (body);
23024       code = get_arm_condition_code (predicate);
23025       mask = (1 << n) - 1;
23026       if (arm_current_cc == code)
23027         arm_condexec_mask |= (mask << arm_condexec_masklen);
23028       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23029         break;
23030
23031       arm_condexec_count++;
23032       arm_condexec_masklen += n;
23033
23034       /* A jump must be the last instruction in a conditional block.  */
23035       if (JUMP_P (insn))
23036         break;
23037     }
23038   /* Restore recog_data (getting the attributes of other insns can
23039      destroy this array, but final.c assumes that it remains intact
23040      across this call).  */
23041   extract_constrain_insn_cached (first_insn);
23042 }
23043
23044 void
23045 arm_final_prescan_insn (rtx_insn *insn)
23046 {
23047   /* BODY will hold the body of INSN.  */
23048   rtx body = PATTERN (insn);
23049
23050   /* This will be 1 if trying to repeat the trick, and things need to be
23051      reversed if it appears to fail.  */
23052   int reverse = 0;
23053
23054   /* If we start with a return insn, we only succeed if we find another one.  */
23055   int seeking_return = 0;
23056   enum rtx_code return_code = UNKNOWN;
23057
23058   /* START_INSN will hold the insn from where we start looking.  This is the
23059      first insn after the following code_label if REVERSE is true.  */
23060   rtx_insn *start_insn = insn;
23061
23062   /* If in state 4, check if the target branch is reached, in order to
23063      change back to state 0.  */
23064   if (arm_ccfsm_state == 4)
23065     {
23066       if (insn == arm_target_insn)
23067         {
23068           arm_target_insn = NULL;
23069           arm_ccfsm_state = 0;
23070         }
23071       return;
23072     }
23073
23074   /* If in state 3, it is possible to repeat the trick, if this insn is an
23075      unconditional branch to a label, and immediately following this branch
23076      is the previous target label which is only used once, and the label this
23077      branch jumps to is not too far off.  */
23078   if (arm_ccfsm_state == 3)
23079     {
23080       if (simplejump_p (insn))
23081         {
23082           start_insn = next_nonnote_insn (start_insn);
23083           if (BARRIER_P (start_insn))
23084             {
23085               /* XXX Isn't this always a barrier?  */
23086               start_insn = next_nonnote_insn (start_insn);
23087             }
23088           if (LABEL_P (start_insn)
23089               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23090               && LABEL_NUSES (start_insn) == 1)
23091             reverse = TRUE;
23092           else
23093             return;
23094         }
23095       else if (ANY_RETURN_P (body))
23096         {
23097           start_insn = next_nonnote_insn (start_insn);
23098           if (BARRIER_P (start_insn))
23099             start_insn = next_nonnote_insn (start_insn);
23100           if (LABEL_P (start_insn)
23101               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23102               && LABEL_NUSES (start_insn) == 1)
23103             {
23104               reverse = TRUE;
23105               seeking_return = 1;
23106               return_code = GET_CODE (body);
23107             }
23108           else
23109             return;
23110         }
23111       else
23112         return;
23113     }
23114
23115   gcc_assert (!arm_ccfsm_state || reverse);
23116   if (!JUMP_P (insn))
23117     return;
23118
23119   /* This jump might be paralleled with a clobber of the condition codes
23120      the jump should always come first */
23121   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23122     body = XVECEXP (body, 0, 0);
23123
23124   if (reverse
23125       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23126           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23127     {
23128       int insns_skipped;
23129       int fail = FALSE, succeed = FALSE;
23130       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23131       int then_not_else = TRUE;
23132       rtx_insn *this_insn = start_insn;
23133       rtx label = 0;
23134
23135       /* Register the insn jumped to.  */
23136       if (reverse)
23137         {
23138           if (!seeking_return)
23139             label = XEXP (SET_SRC (body), 0);
23140         }
23141       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23142         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23143       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23144         {
23145           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23146           then_not_else = FALSE;
23147         }
23148       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23149         {
23150           seeking_return = 1;
23151           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23152         }
23153       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23154         {
23155           seeking_return = 1;
23156           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23157           then_not_else = FALSE;
23158         }
23159       else
23160         gcc_unreachable ();
23161
23162       /* See how many insns this branch skips, and what kind of insns.  If all
23163          insns are okay, and the label or unconditional branch to the same
23164          label is not too far away, succeed.  */
23165       for (insns_skipped = 0;
23166            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23167         {
23168           rtx scanbody;
23169
23170           this_insn = next_nonnote_insn (this_insn);
23171           if (!this_insn)
23172             break;
23173
23174           switch (GET_CODE (this_insn))
23175             {
23176             case CODE_LABEL:
23177               /* Succeed if it is the target label, otherwise fail since
23178                  control falls in from somewhere else.  */
23179               if (this_insn == label)
23180                 {
23181                   arm_ccfsm_state = 1;
23182                   succeed = TRUE;
23183                 }
23184               else
23185                 fail = TRUE;
23186               break;
23187
23188             case BARRIER:
23189               /* Succeed if the following insn is the target label.
23190                  Otherwise fail.
23191                  If return insns are used then the last insn in a function
23192                  will be a barrier.  */
23193               this_insn = next_nonnote_insn (this_insn);
23194               if (this_insn && this_insn == label)
23195                 {
23196                   arm_ccfsm_state = 1;
23197                   succeed = TRUE;
23198                 }
23199               else
23200                 fail = TRUE;
23201               break;
23202
23203             case CALL_INSN:
23204               /* The AAPCS says that conditional calls should not be
23205                  used since they make interworking inefficient (the
23206                  linker can't transform BL<cond> into BLX).  That's
23207                  only a problem if the machine has BLX.  */
23208               if (arm_arch5)
23209                 {
23210                   fail = TRUE;
23211                   break;
23212                 }
23213
23214               /* Succeed if the following insn is the target label, or
23215                  if the following two insns are a barrier and the
23216                  target label.  */
23217               this_insn = next_nonnote_insn (this_insn);
23218               if (this_insn && BARRIER_P (this_insn))
23219                 this_insn = next_nonnote_insn (this_insn);
23220
23221               if (this_insn && this_insn == label
23222                   && insns_skipped < max_insns_skipped)
23223                 {
23224                   arm_ccfsm_state = 1;
23225                   succeed = TRUE;
23226                 }
23227               else
23228                 fail = TRUE;
23229               break;
23230
23231             case JUMP_INSN:
23232               /* If this is an unconditional branch to the same label, succeed.
23233                  If it is to another label, do nothing.  If it is conditional,
23234                  fail.  */
23235               /* XXX Probably, the tests for SET and the PC are
23236                  unnecessary.  */
23237
23238               scanbody = PATTERN (this_insn);
23239               if (GET_CODE (scanbody) == SET
23240                   && GET_CODE (SET_DEST (scanbody)) == PC)
23241                 {
23242                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23243                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23244                     {
23245                       arm_ccfsm_state = 2;
23246                       succeed = TRUE;
23247                     }
23248                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23249                     fail = TRUE;
23250                 }
23251               /* Fail if a conditional return is undesirable (e.g. on a
23252                  StrongARM), but still allow this if optimizing for size.  */
23253               else if (GET_CODE (scanbody) == return_code
23254                        && !use_return_insn (TRUE, NULL)
23255                        && !optimize_size)
23256                 fail = TRUE;
23257               else if (GET_CODE (scanbody) == return_code)
23258                 {
23259                   arm_ccfsm_state = 2;
23260                   succeed = TRUE;
23261                 }
23262               else if (GET_CODE (scanbody) == PARALLEL)
23263                 {
23264                   switch (get_attr_conds (this_insn))
23265                     {
23266                     case CONDS_NOCOND:
23267                       break;
23268                     default:
23269                       fail = TRUE;
23270                       break;
23271                     }
23272                 }
23273               else
23274                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23275
23276               break;
23277
23278             case INSN:
23279               /* Instructions using or affecting the condition codes make it
23280                  fail.  */
23281               scanbody = PATTERN (this_insn);
23282               if (!(GET_CODE (scanbody) == SET
23283                     || GET_CODE (scanbody) == PARALLEL)
23284                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23285                 fail = TRUE;
23286               break;
23287
23288             default:
23289               break;
23290             }
23291         }
23292       if (succeed)
23293         {
23294           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23295             arm_target_label = CODE_LABEL_NUMBER (label);
23296           else
23297             {
23298               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23299
23300               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23301                 {
23302                   this_insn = next_nonnote_insn (this_insn);
23303                   gcc_assert (!this_insn
23304                               || (!BARRIER_P (this_insn)
23305                                   && !LABEL_P (this_insn)));
23306                 }
23307               if (!this_insn)
23308                 {
23309                   /* Oh, dear! we ran off the end.. give up.  */
23310                   extract_constrain_insn_cached (insn);
23311                   arm_ccfsm_state = 0;
23312                   arm_target_insn = NULL;
23313                   return;
23314                 }
23315               arm_target_insn = this_insn;
23316             }
23317
23318           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23319              what it was.  */
23320           if (!reverse)
23321             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23322
23323           if (reverse || then_not_else)
23324             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23325         }
23326
23327       /* Restore recog_data (getting the attributes of other insns can
23328          destroy this array, but final.c assumes that it remains intact
23329          across this call.  */
23330       extract_constrain_insn_cached (insn);
23331     }
23332 }
23333
23334 /* Output IT instructions.  */
23335 void
23336 thumb2_asm_output_opcode (FILE * stream)
23337 {
23338   char buff[5];
23339   int n;
23340
23341   if (arm_condexec_mask)
23342     {
23343       for (n = 0; n < arm_condexec_masklen; n++)
23344         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23345       buff[n] = 0;
23346       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23347                   arm_condition_codes[arm_current_cc]);
23348       arm_condexec_mask = 0;
23349     }
23350 }
23351
23352 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23353 static bool
23354 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23355 {
23356   if (GET_MODE_CLASS (mode) == MODE_CC)
23357     return (regno == CC_REGNUM
23358             || (TARGET_HARD_FLOAT
23359                 && regno == VFPCC_REGNUM));
23360
23361   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23362     return false;
23363
23364   if (TARGET_THUMB1)
23365     /* For the Thumb we only allow values bigger than SImode in
23366        registers 0 - 6, so that there is always a second low
23367        register available to hold the upper part of the value.
23368        We probably we ought to ensure that the register is the
23369        start of an even numbered register pair.  */
23370     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23371
23372   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23373     {
23374       if (mode == SFmode || mode == SImode)
23375         return VFP_REGNO_OK_FOR_SINGLE (regno);
23376
23377       if (mode == DFmode)
23378         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23379
23380       if (mode == HFmode)
23381         return VFP_REGNO_OK_FOR_SINGLE (regno);
23382
23383       /* VFP registers can hold HImode values.  */
23384       if (mode == HImode)
23385         return VFP_REGNO_OK_FOR_SINGLE (regno);
23386
23387       if (TARGET_NEON)
23388         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23389                || (VALID_NEON_QREG_MODE (mode)
23390                    && NEON_REGNO_OK_FOR_QUAD (regno))
23391                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23392                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23393                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23394                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23395                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23396
23397       return false;
23398     }
23399
23400   if (TARGET_REALLY_IWMMXT)
23401     {
23402       if (IS_IWMMXT_GR_REGNUM (regno))
23403         return mode == SImode;
23404
23405       if (IS_IWMMXT_REGNUM (regno))
23406         return VALID_IWMMXT_REG_MODE (mode);
23407     }
23408
23409   /* We allow almost any value to be stored in the general registers.
23410      Restrict doubleword quantities to even register pairs in ARM state
23411      so that we can use ldrd.  Do not allow very large Neon structure
23412      opaque modes in general registers; they would use too many.  */
23413   if (regno <= LAST_ARM_REGNUM)
23414     {
23415       if (ARM_NUM_REGS (mode) > 4)
23416         return false;
23417
23418       if (TARGET_THUMB2)
23419         return true;
23420
23421       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23422     }
23423
23424   if (regno == FRAME_POINTER_REGNUM
23425       || regno == ARG_POINTER_REGNUM)
23426     /* We only allow integers in the fake hard registers.  */
23427     return GET_MODE_CLASS (mode) == MODE_INT;
23428
23429   return false;
23430 }
23431
23432 /* Implement TARGET_MODES_TIEABLE_P.  */
23433
23434 static bool
23435 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23436 {
23437   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23438     return true;
23439
23440   /* We specifically want to allow elements of "structure" modes to
23441      be tieable to the structure.  This more general condition allows
23442      other rarer situations too.  */
23443   if (TARGET_NEON
23444       && (VALID_NEON_DREG_MODE (mode1)
23445           || VALID_NEON_QREG_MODE (mode1)
23446           || VALID_NEON_STRUCT_MODE (mode1))
23447       && (VALID_NEON_DREG_MODE (mode2)
23448           || VALID_NEON_QREG_MODE (mode2)
23449           || VALID_NEON_STRUCT_MODE (mode2)))
23450     return true;
23451
23452   return false;
23453 }
23454
23455 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23456    not used in arm mode.  */
23457
23458 enum reg_class
23459 arm_regno_class (int regno)
23460 {
23461   if (regno == PC_REGNUM)
23462     return NO_REGS;
23463
23464   if (TARGET_THUMB1)
23465     {
23466       if (regno == STACK_POINTER_REGNUM)
23467         return STACK_REG;
23468       if (regno == CC_REGNUM)
23469         return CC_REG;
23470       if (regno < 8)
23471         return LO_REGS;
23472       return HI_REGS;
23473     }
23474
23475   if (TARGET_THUMB2 && regno < 8)
23476     return LO_REGS;
23477
23478   if (   regno <= LAST_ARM_REGNUM
23479       || regno == FRAME_POINTER_REGNUM
23480       || regno == ARG_POINTER_REGNUM)
23481     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23482
23483   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23484     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23485
23486   if (IS_VFP_REGNUM (regno))
23487     {
23488       if (regno <= D7_VFP_REGNUM)
23489         return VFP_D0_D7_REGS;
23490       else if (regno <= LAST_LO_VFP_REGNUM)
23491         return VFP_LO_REGS;
23492       else
23493         return VFP_HI_REGS;
23494     }
23495
23496   if (IS_IWMMXT_REGNUM (regno))
23497     return IWMMXT_REGS;
23498
23499   if (IS_IWMMXT_GR_REGNUM (regno))
23500     return IWMMXT_GR_REGS;
23501
23502   return NO_REGS;
23503 }
23504
23505 /* Handle a special case when computing the offset
23506    of an argument from the frame pointer.  */
23507 int
23508 arm_debugger_arg_offset (int value, rtx addr)
23509 {
23510   rtx_insn *insn;
23511
23512   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23513   if (value != 0)
23514     return 0;
23515
23516   /* We can only cope with the case where the address is held in a register.  */
23517   if (!REG_P (addr))
23518     return 0;
23519
23520   /* If we are using the frame pointer to point at the argument, then
23521      an offset of 0 is correct.  */
23522   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23523     return 0;
23524
23525   /* If we are using the stack pointer to point at the
23526      argument, then an offset of 0 is correct.  */
23527   /* ??? Check this is consistent with thumb2 frame layout.  */
23528   if ((TARGET_THUMB || !frame_pointer_needed)
23529       && REGNO (addr) == SP_REGNUM)
23530     return 0;
23531
23532   /* Oh dear.  The argument is pointed to by a register rather
23533      than being held in a register, or being stored at a known
23534      offset from the frame pointer.  Since GDB only understands
23535      those two kinds of argument we must translate the address
23536      held in the register into an offset from the frame pointer.
23537      We do this by searching through the insns for the function
23538      looking to see where this register gets its value.  If the
23539      register is initialized from the frame pointer plus an offset
23540      then we are in luck and we can continue, otherwise we give up.
23541
23542      This code is exercised by producing debugging information
23543      for a function with arguments like this:
23544
23545            double func (double a, double b, int c, double d) {return d;}
23546
23547      Without this code the stab for parameter 'd' will be set to
23548      an offset of 0 from the frame pointer, rather than 8.  */
23549
23550   /* The if() statement says:
23551
23552      If the insn is a normal instruction
23553      and if the insn is setting the value in a register
23554      and if the register being set is the register holding the address of the argument
23555      and if the address is computing by an addition
23556      that involves adding to a register
23557      which is the frame pointer
23558      a constant integer
23559
23560      then...  */
23561
23562   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23563     {
23564       if (   NONJUMP_INSN_P (insn)
23565           && GET_CODE (PATTERN (insn)) == SET
23566           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23567           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23568           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23569           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23570           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23571              )
23572         {
23573           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23574
23575           break;
23576         }
23577     }
23578
23579   if (value == 0)
23580     {
23581       debug_rtx (addr);
23582       warning (0, "unable to compute real location of stacked parameter");
23583       value = 8; /* XXX magic hack */
23584     }
23585
23586   return value;
23587 }
23588 \f
23589 /* Implement TARGET_PROMOTED_TYPE.  */
23590
23591 static tree
23592 arm_promoted_type (const_tree t)
23593 {
23594   if (SCALAR_FLOAT_TYPE_P (t)
23595       && TYPE_PRECISION (t) == 16
23596       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23597     return float_type_node;
23598   return NULL_TREE;
23599 }
23600
23601 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23602    This simply adds HFmode as a supported mode; even though we don't
23603    implement arithmetic on this type directly, it's supported by
23604    optabs conversions, much the way the double-word arithmetic is
23605    special-cased in the default hook.  */
23606
23607 static bool
23608 arm_scalar_mode_supported_p (scalar_mode mode)
23609 {
23610   if (mode == HFmode)
23611     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23612   else if (ALL_FIXED_POINT_MODE_P (mode))
23613     return true;
23614   else
23615     return default_scalar_mode_supported_p (mode);
23616 }
23617
23618 /* Set the value of FLT_EVAL_METHOD.
23619    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23620
23621     0: evaluate all operations and constants, whose semantic type has at
23622        most the range and precision of type float, to the range and
23623        precision of float; evaluate all other operations and constants to
23624        the range and precision of the semantic type;
23625
23626     N, where _FloatN is a supported interchange floating type
23627        evaluate all operations and constants, whose semantic type has at
23628        most the range and precision of _FloatN type, to the range and
23629        precision of the _FloatN type; evaluate all other operations and
23630        constants to the range and precision of the semantic type;
23631
23632    If we have the ARMv8.2-A extensions then we support _Float16 in native
23633    precision, so we should set this to 16.  Otherwise, we support the type,
23634    but want to evaluate expressions in float precision, so set this to
23635    0.  */
23636
23637 static enum flt_eval_method
23638 arm_excess_precision (enum excess_precision_type type)
23639 {
23640   switch (type)
23641     {
23642       case EXCESS_PRECISION_TYPE_FAST:
23643       case EXCESS_PRECISION_TYPE_STANDARD:
23644         /* We can calculate either in 16-bit range and precision or
23645            32-bit range and precision.  Make that decision based on whether
23646            we have native support for the ARMv8.2-A 16-bit floating-point
23647            instructions or not.  */
23648         return (TARGET_VFP_FP16INST
23649                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23650                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23651       case EXCESS_PRECISION_TYPE_IMPLICIT:
23652         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23653       default:
23654         gcc_unreachable ();
23655     }
23656   return FLT_EVAL_METHOD_UNPREDICTABLE;
23657 }
23658
23659
23660 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23661    _Float16 if we are using anything other than ieee format for 16-bit
23662    floating point.  Otherwise, punt to the default implementation.  */
23663 static opt_scalar_float_mode
23664 arm_floatn_mode (int n, bool extended)
23665 {
23666   if (!extended && n == 16)
23667     {
23668       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23669         return HFmode;
23670       return opt_scalar_float_mode ();
23671     }
23672
23673   return default_floatn_mode (n, extended);
23674 }
23675
23676
23677 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23678    not to early-clobber SRC registers in the process.
23679
23680    We assume that the operands described by SRC and DEST represent a
23681    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23682    number of components into which the copy has been decomposed.  */
23683 void
23684 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23685 {
23686   unsigned int i;
23687
23688   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23689       || REGNO (operands[0]) < REGNO (operands[1]))
23690     {
23691       for (i = 0; i < count; i++)
23692         {
23693           operands[2 * i] = dest[i];
23694           operands[2 * i + 1] = src[i];
23695         }
23696     }
23697   else
23698     {
23699       for (i = 0; i < count; i++)
23700         {
23701           operands[2 * i] = dest[count - i - 1];
23702           operands[2 * i + 1] = src[count - i - 1];
23703         }
23704     }
23705 }
23706
23707 /* Split operands into moves from op[1] + op[2] into op[0].  */
23708
23709 void
23710 neon_split_vcombine (rtx operands[3])
23711 {
23712   unsigned int dest = REGNO (operands[0]);
23713   unsigned int src1 = REGNO (operands[1]);
23714   unsigned int src2 = REGNO (operands[2]);
23715   machine_mode halfmode = GET_MODE (operands[1]);
23716   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23717   rtx destlo, desthi;
23718
23719   if (src1 == dest && src2 == dest + halfregs)
23720     {
23721       /* No-op move.  Can't split to nothing; emit something.  */
23722       emit_note (NOTE_INSN_DELETED);
23723       return;
23724     }
23725
23726   /* Preserve register attributes for variable tracking.  */
23727   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23728   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23729                                GET_MODE_SIZE (halfmode));
23730
23731   /* Special case of reversed high/low parts.  Use VSWP.  */
23732   if (src2 == dest && src1 == dest + halfregs)
23733     {
23734       rtx x = gen_rtx_SET (destlo, operands[1]);
23735       rtx y = gen_rtx_SET (desthi, operands[2]);
23736       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23737       return;
23738     }
23739
23740   if (!reg_overlap_mentioned_p (operands[2], destlo))
23741     {
23742       /* Try to avoid unnecessary moves if part of the result
23743          is in the right place already.  */
23744       if (src1 != dest)
23745         emit_move_insn (destlo, operands[1]);
23746       if (src2 != dest + halfregs)
23747         emit_move_insn (desthi, operands[2]);
23748     }
23749   else
23750     {
23751       if (src2 != dest + halfregs)
23752         emit_move_insn (desthi, operands[2]);
23753       if (src1 != dest)
23754         emit_move_insn (destlo, operands[1]);
23755     }
23756 }
23757 \f
23758 /* Return the number (counting from 0) of
23759    the least significant set bit in MASK.  */
23760
23761 inline static int
23762 number_of_first_bit_set (unsigned mask)
23763 {
23764   return ctz_hwi (mask);
23765 }
23766
23767 /* Like emit_multi_reg_push, but allowing for a different set of
23768    registers to be described as saved.  MASK is the set of registers
23769    to be saved; REAL_REGS is the set of registers to be described as
23770    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23771
23772 static rtx_insn *
23773 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23774 {
23775   unsigned long regno;
23776   rtx par[10], tmp, reg;
23777   rtx_insn *insn;
23778   int i, j;
23779
23780   /* Build the parallel of the registers actually being stored.  */
23781   for (i = 0; mask; ++i, mask &= mask - 1)
23782     {
23783       regno = ctz_hwi (mask);
23784       reg = gen_rtx_REG (SImode, regno);
23785
23786       if (i == 0)
23787         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23788       else
23789         tmp = gen_rtx_USE (VOIDmode, reg);
23790
23791       par[i] = tmp;
23792     }
23793
23794   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23795   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23796   tmp = gen_frame_mem (BLKmode, tmp);
23797   tmp = gen_rtx_SET (tmp, par[0]);
23798   par[0] = tmp;
23799
23800   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23801   insn = emit_insn (tmp);
23802
23803   /* Always build the stack adjustment note for unwind info.  */
23804   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23805   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23806   par[0] = tmp;
23807
23808   /* Build the parallel of the registers recorded as saved for unwind.  */
23809   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23810     {
23811       regno = ctz_hwi (real_regs);
23812       reg = gen_rtx_REG (SImode, regno);
23813
23814       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23815       tmp = gen_frame_mem (SImode, tmp);
23816       tmp = gen_rtx_SET (tmp, reg);
23817       RTX_FRAME_RELATED_P (tmp) = 1;
23818       par[j + 1] = tmp;
23819     }
23820
23821   if (j == 0)
23822     tmp = par[0];
23823   else
23824     {
23825       RTX_FRAME_RELATED_P (par[0]) = 1;
23826       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23827     }
23828
23829   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23830
23831   return insn;
23832 }
23833
23834 /* Emit code to push or pop registers to or from the stack.  F is the
23835    assembly file.  MASK is the registers to pop.  */
23836 static void
23837 thumb_pop (FILE *f, unsigned long mask)
23838 {
23839   int regno;
23840   int lo_mask = mask & 0xFF;
23841
23842   gcc_assert (mask);
23843
23844   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23845     {
23846       /* Special case.  Do not generate a POP PC statement here, do it in
23847          thumb_exit() */
23848       thumb_exit (f, -1);
23849       return;
23850     }
23851
23852   fprintf (f, "\tpop\t{");
23853
23854   /* Look at the low registers first.  */
23855   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23856     {
23857       if (lo_mask & 1)
23858         {
23859           asm_fprintf (f, "%r", regno);
23860
23861           if ((lo_mask & ~1) != 0)
23862             fprintf (f, ", ");
23863         }
23864     }
23865
23866   if (mask & (1 << PC_REGNUM))
23867     {
23868       /* Catch popping the PC.  */
23869       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23870           || IS_CMSE_ENTRY (arm_current_func_type ()))
23871         {
23872           /* The PC is never poped directly, instead
23873              it is popped into r3 and then BX is used.  */
23874           fprintf (f, "}\n");
23875
23876           thumb_exit (f, -1);
23877
23878           return;
23879         }
23880       else
23881         {
23882           if (mask & 0xFF)
23883             fprintf (f, ", ");
23884
23885           asm_fprintf (f, "%r", PC_REGNUM);
23886         }
23887     }
23888
23889   fprintf (f, "}\n");
23890 }
23891
23892 /* Generate code to return from a thumb function.
23893    If 'reg_containing_return_addr' is -1, then the return address is
23894    actually on the stack, at the stack pointer.  */
23895 static void
23896 thumb_exit (FILE *f, int reg_containing_return_addr)
23897 {
23898   unsigned regs_available_for_popping;
23899   unsigned regs_to_pop;
23900   int pops_needed;
23901   unsigned available;
23902   unsigned required;
23903   machine_mode mode;
23904   int size;
23905   int restore_a4 = FALSE;
23906
23907   /* Compute the registers we need to pop.  */
23908   regs_to_pop = 0;
23909   pops_needed = 0;
23910
23911   if (reg_containing_return_addr == -1)
23912     {
23913       regs_to_pop |= 1 << LR_REGNUM;
23914       ++pops_needed;
23915     }
23916
23917   if (TARGET_BACKTRACE)
23918     {
23919       /* Restore the (ARM) frame pointer and stack pointer.  */
23920       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23921       pops_needed += 2;
23922     }
23923
23924   /* If there is nothing to pop then just emit the BX instruction and
23925      return.  */
23926   if (pops_needed == 0)
23927     {
23928       if (crtl->calls_eh_return)
23929         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23930
23931       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23932         {
23933           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23934                        reg_containing_return_addr);
23935           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23936         }
23937       else
23938         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23939       return;
23940     }
23941   /* Otherwise if we are not supporting interworking and we have not created
23942      a backtrace structure and the function was not entered in ARM mode then
23943      just pop the return address straight into the PC.  */
23944   else if (!TARGET_INTERWORK
23945            && !TARGET_BACKTRACE
23946            && !is_called_in_ARM_mode (current_function_decl)
23947            && !crtl->calls_eh_return
23948            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23949     {
23950       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23951       return;
23952     }
23953
23954   /* Find out how many of the (return) argument registers we can corrupt.  */
23955   regs_available_for_popping = 0;
23956
23957   /* If returning via __builtin_eh_return, the bottom three registers
23958      all contain information needed for the return.  */
23959   if (crtl->calls_eh_return)
23960     size = 12;
23961   else
23962     {
23963       /* If we can deduce the registers used from the function's
23964          return value.  This is more reliable that examining
23965          df_regs_ever_live_p () because that will be set if the register is
23966          ever used in the function, not just if the register is used
23967          to hold a return value.  */
23968
23969       if (crtl->return_rtx != 0)
23970         mode = GET_MODE (crtl->return_rtx);
23971       else
23972         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23973
23974       size = GET_MODE_SIZE (mode);
23975
23976       if (size == 0)
23977         {
23978           /* In a void function we can use any argument register.
23979              In a function that returns a structure on the stack
23980              we can use the second and third argument registers.  */
23981           if (mode == VOIDmode)
23982             regs_available_for_popping =
23983               (1 << ARG_REGISTER (1))
23984               | (1 << ARG_REGISTER (2))
23985               | (1 << ARG_REGISTER (3));
23986           else
23987             regs_available_for_popping =
23988               (1 << ARG_REGISTER (2))
23989               | (1 << ARG_REGISTER (3));
23990         }
23991       else if (size <= 4)
23992         regs_available_for_popping =
23993           (1 << ARG_REGISTER (2))
23994           | (1 << ARG_REGISTER (3));
23995       else if (size <= 8)
23996         regs_available_for_popping =
23997           (1 << ARG_REGISTER (3));
23998     }
23999
24000   /* Match registers to be popped with registers into which we pop them.  */
24001   for (available = regs_available_for_popping,
24002        required  = regs_to_pop;
24003        required != 0 && available != 0;
24004        available &= ~(available & - available),
24005        required  &= ~(required  & - required))
24006     -- pops_needed;
24007
24008   /* If we have any popping registers left over, remove them.  */
24009   if (available > 0)
24010     regs_available_for_popping &= ~available;
24011
24012   /* Otherwise if we need another popping register we can use
24013      the fourth argument register.  */
24014   else if (pops_needed)
24015     {
24016       /* If we have not found any free argument registers and
24017          reg a4 contains the return address, we must move it.  */
24018       if (regs_available_for_popping == 0
24019           && reg_containing_return_addr == LAST_ARG_REGNUM)
24020         {
24021           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24022           reg_containing_return_addr = LR_REGNUM;
24023         }
24024       else if (size > 12)
24025         {
24026           /* Register a4 is being used to hold part of the return value,
24027              but we have dire need of a free, low register.  */
24028           restore_a4 = TRUE;
24029
24030           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24031         }
24032
24033       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24034         {
24035           /* The fourth argument register is available.  */
24036           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24037
24038           --pops_needed;
24039         }
24040     }
24041
24042   /* Pop as many registers as we can.  */
24043   thumb_pop (f, regs_available_for_popping);
24044
24045   /* Process the registers we popped.  */
24046   if (reg_containing_return_addr == -1)
24047     {
24048       /* The return address was popped into the lowest numbered register.  */
24049       regs_to_pop &= ~(1 << LR_REGNUM);
24050
24051       reg_containing_return_addr =
24052         number_of_first_bit_set (regs_available_for_popping);
24053
24054       /* Remove this register for the mask of available registers, so that
24055          the return address will not be corrupted by further pops.  */
24056       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24057     }
24058
24059   /* If we popped other registers then handle them here.  */
24060   if (regs_available_for_popping)
24061     {
24062       int frame_pointer;
24063
24064       /* Work out which register currently contains the frame pointer.  */
24065       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24066
24067       /* Move it into the correct place.  */
24068       asm_fprintf (f, "\tmov\t%r, %r\n",
24069                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24070
24071       /* (Temporarily) remove it from the mask of popped registers.  */
24072       regs_available_for_popping &= ~(1 << frame_pointer);
24073       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24074
24075       if (regs_available_for_popping)
24076         {
24077           int stack_pointer;
24078
24079           /* We popped the stack pointer as well,
24080              find the register that contains it.  */
24081           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24082
24083           /* Move it into the stack register.  */
24084           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24085
24086           /* At this point we have popped all necessary registers, so
24087              do not worry about restoring regs_available_for_popping
24088              to its correct value:
24089
24090              assert (pops_needed == 0)
24091              assert (regs_available_for_popping == (1 << frame_pointer))
24092              assert (regs_to_pop == (1 << STACK_POINTER))  */
24093         }
24094       else
24095         {
24096           /* Since we have just move the popped value into the frame
24097              pointer, the popping register is available for reuse, and
24098              we know that we still have the stack pointer left to pop.  */
24099           regs_available_for_popping |= (1 << frame_pointer);
24100         }
24101     }
24102
24103   /* If we still have registers left on the stack, but we no longer have
24104      any registers into which we can pop them, then we must move the return
24105      address into the link register and make available the register that
24106      contained it.  */
24107   if (regs_available_for_popping == 0 && pops_needed > 0)
24108     {
24109       regs_available_for_popping |= 1 << reg_containing_return_addr;
24110
24111       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24112                    reg_containing_return_addr);
24113
24114       reg_containing_return_addr = LR_REGNUM;
24115     }
24116
24117   /* If we have registers left on the stack then pop some more.
24118      We know that at most we will want to pop FP and SP.  */
24119   if (pops_needed > 0)
24120     {
24121       int  popped_into;
24122       int  move_to;
24123
24124       thumb_pop (f, regs_available_for_popping);
24125
24126       /* We have popped either FP or SP.
24127          Move whichever one it is into the correct register.  */
24128       popped_into = number_of_first_bit_set (regs_available_for_popping);
24129       move_to     = number_of_first_bit_set (regs_to_pop);
24130
24131       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24132       --pops_needed;
24133     }
24134
24135   /* If we still have not popped everything then we must have only
24136      had one register available to us and we are now popping the SP.  */
24137   if (pops_needed > 0)
24138     {
24139       int  popped_into;
24140
24141       thumb_pop (f, regs_available_for_popping);
24142
24143       popped_into = number_of_first_bit_set (regs_available_for_popping);
24144
24145       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24146       /*
24147         assert (regs_to_pop == (1 << STACK_POINTER))
24148         assert (pops_needed == 1)
24149       */
24150     }
24151
24152   /* If necessary restore the a4 register.  */
24153   if (restore_a4)
24154     {
24155       if (reg_containing_return_addr != LR_REGNUM)
24156         {
24157           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24158           reg_containing_return_addr = LR_REGNUM;
24159         }
24160
24161       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24162     }
24163
24164   if (crtl->calls_eh_return)
24165     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24166
24167   /* Return to caller.  */
24168   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24169     {
24170       /* This is for the cases where LR is not being used to contain the return
24171          address.  It may therefore contain information that we might not want
24172          to leak, hence it must be cleared.  The value in R0 will never be a
24173          secret at this point, so it is safe to use it, see the clearing code
24174          in 'cmse_nonsecure_entry_clear_before_return'.  */
24175       if (reg_containing_return_addr != LR_REGNUM)
24176         asm_fprintf (f, "\tmov\tlr, r0\n");
24177
24178       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24179       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24180     }
24181   else
24182     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24183 }
24184 \f
24185 /* Scan INSN just before assembler is output for it.
24186    For Thumb-1, we track the status of the condition codes; this
24187    information is used in the cbranchsi4_insn pattern.  */
24188 void
24189 thumb1_final_prescan_insn (rtx_insn *insn)
24190 {
24191   if (flag_print_asm_name)
24192     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24193                  INSN_ADDRESSES (INSN_UID (insn)));
24194   /* Don't overwrite the previous setter when we get to a cbranch.  */
24195   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24196     {
24197       enum attr_conds conds;
24198
24199       if (cfun->machine->thumb1_cc_insn)
24200         {
24201           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24202               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24203             CC_STATUS_INIT;
24204         }
24205       conds = get_attr_conds (insn);
24206       if (conds == CONDS_SET)
24207         {
24208           rtx set = single_set (insn);
24209           cfun->machine->thumb1_cc_insn = insn;
24210           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24211           cfun->machine->thumb1_cc_op1 = const0_rtx;
24212           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24213           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24214             {
24215               rtx src1 = XEXP (SET_SRC (set), 1);
24216               if (src1 == const0_rtx)
24217                 cfun->machine->thumb1_cc_mode = CCmode;
24218             }
24219           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24220             {
24221               /* Record the src register operand instead of dest because
24222                  cprop_hardreg pass propagates src.  */
24223               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24224             }
24225         }
24226       else if (conds != CONDS_NOCOND)
24227         cfun->machine->thumb1_cc_insn = NULL_RTX;
24228     }
24229
24230     /* Check if unexpected far jump is used.  */
24231     if (cfun->machine->lr_save_eliminated
24232         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24233       internal_error("Unexpected thumb1 far jump");
24234 }
24235
24236 int
24237 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24238 {
24239   unsigned HOST_WIDE_INT mask = 0xff;
24240   int i;
24241
24242   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24243   if (val == 0) /* XXX */
24244     return 0;
24245
24246   for (i = 0; i < 25; i++)
24247     if ((val & (mask << i)) == val)
24248       return 1;
24249
24250   return 0;
24251 }
24252
24253 /* Returns nonzero if the current function contains,
24254    or might contain a far jump.  */
24255 static int
24256 thumb_far_jump_used_p (void)
24257 {
24258   rtx_insn *insn;
24259   bool far_jump = false;
24260   unsigned int func_size = 0;
24261
24262   /* If we have already decided that far jumps may be used,
24263      do not bother checking again, and always return true even if
24264      it turns out that they are not being used.  Once we have made
24265      the decision that far jumps are present (and that hence the link
24266      register will be pushed onto the stack) we cannot go back on it.  */
24267   if (cfun->machine->far_jump_used)
24268     return 1;
24269
24270   /* If this function is not being called from the prologue/epilogue
24271      generation code then it must be being called from the
24272      INITIAL_ELIMINATION_OFFSET macro.  */
24273   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24274     {
24275       /* In this case we know that we are being asked about the elimination
24276          of the arg pointer register.  If that register is not being used,
24277          then there are no arguments on the stack, and we do not have to
24278          worry that a far jump might force the prologue to push the link
24279          register, changing the stack offsets.  In this case we can just
24280          return false, since the presence of far jumps in the function will
24281          not affect stack offsets.
24282
24283          If the arg pointer is live (or if it was live, but has now been
24284          eliminated and so set to dead) then we do have to test to see if
24285          the function might contain a far jump.  This test can lead to some
24286          false negatives, since before reload is completed, then length of
24287          branch instructions is not known, so gcc defaults to returning their
24288          longest length, which in turn sets the far jump attribute to true.
24289
24290          A false negative will not result in bad code being generated, but it
24291          will result in a needless push and pop of the link register.  We
24292          hope that this does not occur too often.
24293
24294          If we need doubleword stack alignment this could affect the other
24295          elimination offsets so we can't risk getting it wrong.  */
24296       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24297         cfun->machine->arg_pointer_live = 1;
24298       else if (!cfun->machine->arg_pointer_live)
24299         return 0;
24300     }
24301
24302   /* We should not change far_jump_used during or after reload, as there is
24303      no chance to change stack frame layout.  */
24304   if (reload_in_progress || reload_completed)
24305     return 0;
24306
24307   /* Check to see if the function contains a branch
24308      insn with the far jump attribute set.  */
24309   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24310     {
24311       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24312         {
24313           far_jump = true;
24314         }
24315       func_size += get_attr_length (insn);
24316     }
24317
24318   /* Attribute far_jump will always be true for thumb1 before
24319      shorten_branch pass.  So checking far_jump attribute before
24320      shorten_branch isn't much useful.
24321
24322      Following heuristic tries to estimate more accurately if a far jump
24323      may finally be used.  The heuristic is very conservative as there is
24324      no chance to roll-back the decision of not to use far jump.
24325
24326      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24327      2-byte insn is associated with a 4 byte constant pool.  Using
24328      function size 2048/3 as the threshold is conservative enough.  */
24329   if (far_jump)
24330     {
24331       if ((func_size * 3) >= 2048)
24332         {
24333           /* Record the fact that we have decided that
24334              the function does use far jumps.  */
24335           cfun->machine->far_jump_used = 1;
24336           return 1;
24337         }
24338     }
24339
24340   return 0;
24341 }
24342
24343 /* Return nonzero if FUNC must be entered in ARM mode.  */
24344 static bool
24345 is_called_in_ARM_mode (tree func)
24346 {
24347   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24348
24349   /* Ignore the problem about functions whose address is taken.  */
24350   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24351     return true;
24352
24353 #ifdef ARM_PE
24354   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24355 #else
24356   return false;
24357 #endif
24358 }
24359
24360 /* Given the stack offsets and register mask in OFFSETS, decide how
24361    many additional registers to push instead of subtracting a constant
24362    from SP.  For epilogues the principle is the same except we use pop.
24363    FOR_PROLOGUE indicates which we're generating.  */
24364 static int
24365 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24366 {
24367   HOST_WIDE_INT amount;
24368   unsigned long live_regs_mask = offsets->saved_regs_mask;
24369   /* Extract a mask of the ones we can give to the Thumb's push/pop
24370      instruction.  */
24371   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24372   /* Then count how many other high registers will need to be pushed.  */
24373   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24374   int n_free, reg_base, size;
24375
24376   if (!for_prologue && frame_pointer_needed)
24377     amount = offsets->locals_base - offsets->saved_regs;
24378   else
24379     amount = offsets->outgoing_args - offsets->saved_regs;
24380
24381   /* If the stack frame size is 512 exactly, we can save one load
24382      instruction, which should make this a win even when optimizing
24383      for speed.  */
24384   if (!optimize_size && amount != 512)
24385     return 0;
24386
24387   /* Can't do this if there are high registers to push.  */
24388   if (high_regs_pushed != 0)
24389     return 0;
24390
24391   /* Shouldn't do it in the prologue if no registers would normally
24392      be pushed at all.  In the epilogue, also allow it if we'll have
24393      a pop insn for the PC.  */
24394   if  (l_mask == 0
24395        && (for_prologue
24396            || TARGET_BACKTRACE
24397            || (live_regs_mask & 1 << LR_REGNUM) == 0
24398            || TARGET_INTERWORK
24399            || crtl->args.pretend_args_size != 0))
24400     return 0;
24401
24402   /* Don't do this if thumb_expand_prologue wants to emit instructions
24403      between the push and the stack frame allocation.  */
24404   if (for_prologue
24405       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24406           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24407     return 0;
24408
24409   reg_base = 0;
24410   n_free = 0;
24411   if (!for_prologue)
24412     {
24413       size = arm_size_return_regs ();
24414       reg_base = ARM_NUM_INTS (size);
24415       live_regs_mask >>= reg_base;
24416     }
24417
24418   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24419          && (for_prologue || call_used_regs[reg_base + n_free]))
24420     {
24421       live_regs_mask >>= 1;
24422       n_free++;
24423     }
24424
24425   if (n_free == 0)
24426     return 0;
24427   gcc_assert (amount / 4 * 4 == amount);
24428
24429   if (amount >= 512 && (amount - n_free * 4) < 512)
24430     return (amount - 508) / 4;
24431   if (amount <= n_free * 4)
24432     return amount / 4;
24433   return 0;
24434 }
24435
24436 /* The bits which aren't usefully expanded as rtl.  */
24437 const char *
24438 thumb1_unexpanded_epilogue (void)
24439 {
24440   arm_stack_offsets *offsets;
24441   int regno;
24442   unsigned long live_regs_mask = 0;
24443   int high_regs_pushed = 0;
24444   int extra_pop;
24445   int had_to_push_lr;
24446   int size;
24447
24448   if (cfun->machine->return_used_this_function != 0)
24449     return "";
24450
24451   if (IS_NAKED (arm_current_func_type ()))
24452     return "";
24453
24454   offsets = arm_get_frame_offsets ();
24455   live_regs_mask = offsets->saved_regs_mask;
24456   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24457
24458   /* If we can deduce the registers used from the function's return value.
24459      This is more reliable that examining df_regs_ever_live_p () because that
24460      will be set if the register is ever used in the function, not just if
24461      the register is used to hold a return value.  */
24462   size = arm_size_return_regs ();
24463
24464   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24465   if (extra_pop > 0)
24466     {
24467       unsigned long extra_mask = (1 << extra_pop) - 1;
24468       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24469     }
24470
24471   /* The prolog may have pushed some high registers to use as
24472      work registers.  e.g. the testsuite file:
24473      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24474      compiles to produce:
24475         push    {r4, r5, r6, r7, lr}
24476         mov     r7, r9
24477         mov     r6, r8
24478         push    {r6, r7}
24479      as part of the prolog.  We have to undo that pushing here.  */
24480
24481   if (high_regs_pushed)
24482     {
24483       unsigned long mask = live_regs_mask & 0xff;
24484       int next_hi_reg;
24485
24486       /* The available low registers depend on the size of the value we are
24487          returning.  */
24488       if (size <= 12)
24489         mask |=  1 << 3;
24490       if (size <= 8)
24491         mask |= 1 << 2;
24492
24493       if (mask == 0)
24494         /* Oh dear!  We have no low registers into which we can pop
24495            high registers!  */
24496         internal_error
24497           ("no low registers available for popping high registers");
24498
24499       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24500         if (live_regs_mask & (1 << next_hi_reg))
24501           break;
24502
24503       while (high_regs_pushed)
24504         {
24505           /* Find lo register(s) into which the high register(s) can
24506              be popped.  */
24507           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24508             {
24509               if (mask & (1 << regno))
24510                 high_regs_pushed--;
24511               if (high_regs_pushed == 0)
24512                 break;
24513             }
24514
24515           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24516
24517           /* Pop the values into the low register(s).  */
24518           thumb_pop (asm_out_file, mask);
24519
24520           /* Move the value(s) into the high registers.  */
24521           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24522             {
24523               if (mask & (1 << regno))
24524                 {
24525                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24526                                regno);
24527
24528                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24529                     if (live_regs_mask & (1 << next_hi_reg))
24530                       break;
24531                 }
24532             }
24533         }
24534       live_regs_mask &= ~0x0f00;
24535     }
24536
24537   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24538   live_regs_mask &= 0xff;
24539
24540   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24541     {
24542       /* Pop the return address into the PC.  */
24543       if (had_to_push_lr)
24544         live_regs_mask |= 1 << PC_REGNUM;
24545
24546       /* Either no argument registers were pushed or a backtrace
24547          structure was created which includes an adjusted stack
24548          pointer, so just pop everything.  */
24549       if (live_regs_mask)
24550         thumb_pop (asm_out_file, live_regs_mask);
24551
24552       /* We have either just popped the return address into the
24553          PC or it is was kept in LR for the entire function.
24554          Note that thumb_pop has already called thumb_exit if the
24555          PC was in the list.  */
24556       if (!had_to_push_lr)
24557         thumb_exit (asm_out_file, LR_REGNUM);
24558     }
24559   else
24560     {
24561       /* Pop everything but the return address.  */
24562       if (live_regs_mask)
24563         thumb_pop (asm_out_file, live_regs_mask);
24564
24565       if (had_to_push_lr)
24566         {
24567           if (size > 12)
24568             {
24569               /* We have no free low regs, so save one.  */
24570               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24571                            LAST_ARG_REGNUM);
24572             }
24573
24574           /* Get the return address into a temporary register.  */
24575           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24576
24577           if (size > 12)
24578             {
24579               /* Move the return address to lr.  */
24580               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24581                            LAST_ARG_REGNUM);
24582               /* Restore the low register.  */
24583               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24584                            IP_REGNUM);
24585               regno = LR_REGNUM;
24586             }
24587           else
24588             regno = LAST_ARG_REGNUM;
24589         }
24590       else
24591         regno = LR_REGNUM;
24592
24593       /* Remove the argument registers that were pushed onto the stack.  */
24594       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24595                    SP_REGNUM, SP_REGNUM,
24596                    crtl->args.pretend_args_size);
24597
24598       thumb_exit (asm_out_file, regno);
24599     }
24600
24601   return "";
24602 }
24603
24604 /* Functions to save and restore machine-specific function data.  */
24605 static struct machine_function *
24606 arm_init_machine_status (void)
24607 {
24608   struct machine_function *machine;
24609   machine = ggc_cleared_alloc<machine_function> ();
24610
24611 #if ARM_FT_UNKNOWN != 0
24612   machine->func_type = ARM_FT_UNKNOWN;
24613 #endif
24614   return machine;
24615 }
24616
24617 /* Return an RTX indicating where the return address to the
24618    calling function can be found.  */
24619 rtx
24620 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24621 {
24622   if (count != 0)
24623     return NULL_RTX;
24624
24625   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24626 }
24627
24628 /* Do anything needed before RTL is emitted for each function.  */
24629 void
24630 arm_init_expanders (void)
24631 {
24632   /* Arrange to initialize and mark the machine per-function status.  */
24633   init_machine_status = arm_init_machine_status;
24634
24635   /* This is to stop the combine pass optimizing away the alignment
24636      adjustment of va_arg.  */
24637   /* ??? It is claimed that this should not be necessary.  */
24638   if (cfun)
24639     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24640 }
24641
24642 /* Check that FUNC is called with a different mode.  */
24643
24644 bool
24645 arm_change_mode_p (tree func)
24646 {
24647   if (TREE_CODE (func) != FUNCTION_DECL)
24648     return false;
24649
24650   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24651
24652   if (!callee_tree)
24653     callee_tree = target_option_default_node;
24654
24655   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24656   int flags = callee_opts->x_target_flags;
24657
24658   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24659 }
24660
24661 /* Like arm_compute_initial_elimination offset.  Simpler because there
24662    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24663    to point at the base of the local variables after static stack
24664    space for a function has been allocated.  */
24665
24666 HOST_WIDE_INT
24667 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24668 {
24669   arm_stack_offsets *offsets;
24670
24671   offsets = arm_get_frame_offsets ();
24672
24673   switch (from)
24674     {
24675     case ARG_POINTER_REGNUM:
24676       switch (to)
24677         {
24678         case STACK_POINTER_REGNUM:
24679           return offsets->outgoing_args - offsets->saved_args;
24680
24681         case FRAME_POINTER_REGNUM:
24682           return offsets->soft_frame - offsets->saved_args;
24683
24684         case ARM_HARD_FRAME_POINTER_REGNUM:
24685           return offsets->saved_regs - offsets->saved_args;
24686
24687         case THUMB_HARD_FRAME_POINTER_REGNUM:
24688           return offsets->locals_base - offsets->saved_args;
24689
24690         default:
24691           gcc_unreachable ();
24692         }
24693       break;
24694
24695     case FRAME_POINTER_REGNUM:
24696       switch (to)
24697         {
24698         case STACK_POINTER_REGNUM:
24699           return offsets->outgoing_args - offsets->soft_frame;
24700
24701         case ARM_HARD_FRAME_POINTER_REGNUM:
24702           return offsets->saved_regs - offsets->soft_frame;
24703
24704         case THUMB_HARD_FRAME_POINTER_REGNUM:
24705           return offsets->locals_base - offsets->soft_frame;
24706
24707         default:
24708           gcc_unreachable ();
24709         }
24710       break;
24711
24712     default:
24713       gcc_unreachable ();
24714     }
24715 }
24716
24717 /* Generate the function's prologue.  */
24718
24719 void
24720 thumb1_expand_prologue (void)
24721 {
24722   rtx_insn *insn;
24723
24724   HOST_WIDE_INT amount;
24725   HOST_WIDE_INT size;
24726   arm_stack_offsets *offsets;
24727   unsigned long func_type;
24728   int regno;
24729   unsigned long live_regs_mask;
24730   unsigned long l_mask;
24731   unsigned high_regs_pushed = 0;
24732   bool lr_needs_saving;
24733
24734   func_type = arm_current_func_type ();
24735
24736   /* Naked functions don't have prologues.  */
24737   if (IS_NAKED (func_type))
24738     {
24739       if (flag_stack_usage_info)
24740         current_function_static_stack_size = 0;
24741       return;
24742     }
24743
24744   if (IS_INTERRUPT (func_type))
24745     {
24746       error ("interrupt Service Routines cannot be coded in Thumb mode");
24747       return;
24748     }
24749
24750   if (is_called_in_ARM_mode (current_function_decl))
24751     emit_insn (gen_prologue_thumb1_interwork ());
24752
24753   offsets = arm_get_frame_offsets ();
24754   live_regs_mask = offsets->saved_regs_mask;
24755   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24756
24757   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24758   l_mask = live_regs_mask & 0x40ff;
24759   /* Then count how many other high registers will need to be pushed.  */
24760   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24761
24762   if (crtl->args.pretend_args_size)
24763     {
24764       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24765
24766       if (cfun->machine->uses_anonymous_args)
24767         {
24768           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24769           unsigned long mask;
24770
24771           mask = 1ul << (LAST_ARG_REGNUM + 1);
24772           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24773
24774           insn = thumb1_emit_multi_reg_push (mask, 0);
24775         }
24776       else
24777         {
24778           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24779                                         stack_pointer_rtx, x));
24780         }
24781       RTX_FRAME_RELATED_P (insn) = 1;
24782     }
24783
24784   if (TARGET_BACKTRACE)
24785     {
24786       HOST_WIDE_INT offset = 0;
24787       unsigned work_register;
24788       rtx work_reg, x, arm_hfp_rtx;
24789
24790       /* We have been asked to create a stack backtrace structure.
24791          The code looks like this:
24792
24793          0   .align 2
24794          0   func:
24795          0     sub   SP, #16         Reserve space for 4 registers.
24796          2     push  {R7}            Push low registers.
24797          4     add   R7, SP, #20     Get the stack pointer before the push.
24798          6     str   R7, [SP, #8]    Store the stack pointer
24799                                         (before reserving the space).
24800          8     mov   R7, PC          Get hold of the start of this code + 12.
24801         10     str   R7, [SP, #16]   Store it.
24802         12     mov   R7, FP          Get hold of the current frame pointer.
24803         14     str   R7, [SP, #4]    Store it.
24804         16     mov   R7, LR          Get hold of the current return address.
24805         18     str   R7, [SP, #12]   Store it.
24806         20     add   R7, SP, #16     Point at the start of the
24807                                         backtrace structure.
24808         22     mov   FP, R7          Put this value into the frame pointer.  */
24809
24810       work_register = thumb_find_work_register (live_regs_mask);
24811       work_reg = gen_rtx_REG (SImode, work_register);
24812       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24813
24814       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24815                                     stack_pointer_rtx, GEN_INT (-16)));
24816       RTX_FRAME_RELATED_P (insn) = 1;
24817
24818       if (l_mask)
24819         {
24820           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24821           RTX_FRAME_RELATED_P (insn) = 1;
24822           lr_needs_saving = false;
24823
24824           offset = bit_count (l_mask) * UNITS_PER_WORD;
24825         }
24826
24827       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24828       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24829
24830       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24831       x = gen_frame_mem (SImode, x);
24832       emit_move_insn (x, work_reg);
24833
24834       /* Make sure that the instruction fetching the PC is in the right place
24835          to calculate "start of backtrace creation code + 12".  */
24836       /* ??? The stores using the common WORK_REG ought to be enough to
24837          prevent the scheduler from doing anything weird.  Failing that
24838          we could always move all of the following into an UNSPEC_VOLATILE.  */
24839       if (l_mask)
24840         {
24841           x = gen_rtx_REG (SImode, PC_REGNUM);
24842           emit_move_insn (work_reg, x);
24843
24844           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24845           x = gen_frame_mem (SImode, x);
24846           emit_move_insn (x, work_reg);
24847
24848           emit_move_insn (work_reg, arm_hfp_rtx);
24849
24850           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24851           x = gen_frame_mem (SImode, x);
24852           emit_move_insn (x, work_reg);
24853         }
24854       else
24855         {
24856           emit_move_insn (work_reg, arm_hfp_rtx);
24857
24858           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24859           x = gen_frame_mem (SImode, x);
24860           emit_move_insn (x, work_reg);
24861
24862           x = gen_rtx_REG (SImode, PC_REGNUM);
24863           emit_move_insn (work_reg, x);
24864
24865           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24866           x = gen_frame_mem (SImode, x);
24867           emit_move_insn (x, work_reg);
24868         }
24869
24870       x = gen_rtx_REG (SImode, LR_REGNUM);
24871       emit_move_insn (work_reg, x);
24872
24873       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24874       x = gen_frame_mem (SImode, x);
24875       emit_move_insn (x, work_reg);
24876
24877       x = GEN_INT (offset + 12);
24878       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24879
24880       emit_move_insn (arm_hfp_rtx, work_reg);
24881     }
24882   /* Optimization:  If we are not pushing any low registers but we are going
24883      to push some high registers then delay our first push.  This will just
24884      be a push of LR and we can combine it with the push of the first high
24885      register.  */
24886   else if ((l_mask & 0xff) != 0
24887            || (high_regs_pushed == 0 && lr_needs_saving))
24888     {
24889       unsigned long mask = l_mask;
24890       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24891       insn = thumb1_emit_multi_reg_push (mask, mask);
24892       RTX_FRAME_RELATED_P (insn) = 1;
24893       lr_needs_saving = false;
24894     }
24895
24896   if (high_regs_pushed)
24897     {
24898       unsigned pushable_regs;
24899       unsigned next_hi_reg;
24900       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24901                                                  : crtl->args.info.nregs;
24902       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24903
24904       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24905         if (live_regs_mask & (1 << next_hi_reg))
24906           break;
24907
24908       /* Here we need to mask out registers used for passing arguments
24909          even if they can be pushed.  This is to avoid using them to stash the high
24910          registers.  Such kind of stash may clobber the use of arguments.  */
24911       pushable_regs = l_mask & (~arg_regs_mask);
24912       if (lr_needs_saving)
24913         pushable_regs &= ~(1 << LR_REGNUM);
24914
24915       if (pushable_regs == 0)
24916         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24917
24918       while (high_regs_pushed > 0)
24919         {
24920           unsigned long real_regs_mask = 0;
24921           unsigned long push_mask = 0;
24922
24923           for (regno = LR_REGNUM; regno >= 0; regno --)
24924             {
24925               if (pushable_regs & (1 << regno))
24926                 {
24927                   emit_move_insn (gen_rtx_REG (SImode, regno),
24928                                   gen_rtx_REG (SImode, next_hi_reg));
24929
24930                   high_regs_pushed --;
24931                   real_regs_mask |= (1 << next_hi_reg);
24932                   push_mask |= (1 << regno);
24933
24934                   if (high_regs_pushed)
24935                     {
24936                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24937                            next_hi_reg --)
24938                         if (live_regs_mask & (1 << next_hi_reg))
24939                           break;
24940                     }
24941                   else
24942                     break;
24943                 }
24944             }
24945
24946           /* If we had to find a work register and we have not yet
24947              saved the LR then add it to the list of regs to push.  */
24948           if (lr_needs_saving)
24949             {
24950               push_mask |= 1 << LR_REGNUM;
24951               real_regs_mask |= 1 << LR_REGNUM;
24952               lr_needs_saving = false;
24953             }
24954
24955           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24956           RTX_FRAME_RELATED_P (insn) = 1;
24957         }
24958     }
24959
24960   /* Load the pic register before setting the frame pointer,
24961      so we can use r7 as a temporary work register.  */
24962   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24963     arm_load_pic_register (live_regs_mask);
24964
24965   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24966     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24967                     stack_pointer_rtx);
24968
24969   size = offsets->outgoing_args - offsets->saved_args;
24970   if (flag_stack_usage_info)
24971     current_function_static_stack_size = size;
24972
24973   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
24974   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24975     sorry ("-fstack-check=specific for Thumb-1");
24976
24977   amount = offsets->outgoing_args - offsets->saved_regs;
24978   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24979   if (amount)
24980     {
24981       if (amount < 512)
24982         {
24983           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24984                                         GEN_INT (- amount)));
24985           RTX_FRAME_RELATED_P (insn) = 1;
24986         }
24987       else
24988         {
24989           rtx reg, dwarf;
24990
24991           /* The stack decrement is too big for an immediate value in a single
24992              insn.  In theory we could issue multiple subtracts, but after
24993              three of them it becomes more space efficient to place the full
24994              value in the constant pool and load into a register.  (Also the
24995              ARM debugger really likes to see only one stack decrement per
24996              function).  So instead we look for a scratch register into which
24997              we can load the decrement, and then we subtract this from the
24998              stack pointer.  Unfortunately on the thumb the only available
24999              scratch registers are the argument registers, and we cannot use
25000              these as they may hold arguments to the function.  Instead we
25001              attempt to locate a call preserved register which is used by this
25002              function.  If we can find one, then we know that it will have
25003              been pushed at the start of the prologue and so we can corrupt
25004              it now.  */
25005           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25006             if (live_regs_mask & (1 << regno))
25007               break;
25008
25009           gcc_assert(regno <= LAST_LO_REGNUM);
25010
25011           reg = gen_rtx_REG (SImode, regno);
25012
25013           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25014
25015           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25016                                         stack_pointer_rtx, reg));
25017
25018           dwarf = gen_rtx_SET (stack_pointer_rtx,
25019                                plus_constant (Pmode, stack_pointer_rtx,
25020                                               -amount));
25021           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25022           RTX_FRAME_RELATED_P (insn) = 1;
25023         }
25024     }
25025
25026   if (frame_pointer_needed)
25027     thumb_set_frame_pointer (offsets);
25028
25029   /* If we are profiling, make sure no instructions are scheduled before
25030      the call to mcount.  Similarly if the user has requested no
25031      scheduling in the prolog.  Similarly if we want non-call exceptions
25032      using the EABI unwinder, to prevent faulting instructions from being
25033      swapped with a stack adjustment.  */
25034   if (crtl->profile || !TARGET_SCHED_PROLOG
25035       || (arm_except_unwind_info (&global_options) == UI_TARGET
25036           && cfun->can_throw_non_call_exceptions))
25037     emit_insn (gen_blockage ());
25038
25039   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25040   if (live_regs_mask & 0xff)
25041     cfun->machine->lr_save_eliminated = 0;
25042 }
25043
25044 /* Clear caller saved registers not used to pass return values and leaked
25045    condition flags before exiting a cmse_nonsecure_entry function.  */
25046
25047 void
25048 cmse_nonsecure_entry_clear_before_return (void)
25049 {
25050   uint64_t to_clear_mask[2];
25051   uint32_t padding_bits_to_clear = 0;
25052   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25053   int regno, maxregno = IP_REGNUM;
25054   tree result_type;
25055   rtx result_rtl;
25056
25057   to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25058   to_clear_mask[0] |= (1ULL << IP_REGNUM);
25059
25060   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25061      registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25062      to make sure the instructions used to clear them are present.  */
25063   if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25064     {
25065       uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25066       maxregno = LAST_VFP_REGNUM;
25067
25068       float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25069       to_clear_mask[0] |= float_mask;
25070
25071       float_mask = (1ULL << (maxregno - 63)) - 1;
25072       to_clear_mask[1] = float_mask;
25073
25074       /* Make sure we don't clear the two scratch registers used to clear the
25075          relevant FPSCR bits in output_return_instruction.  */
25076       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25077       to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25078       emit_use (gen_rtx_REG (SImode, 4));
25079       to_clear_mask[0] &= ~(1ULL << 4);
25080     }
25081
25082   /* If the user has defined registers to be caller saved, these are no longer
25083      restored by the function before returning and must thus be cleared for
25084      security purposes.  */
25085   for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25086     {
25087       /* We do not touch registers that can be used to pass arguments as per
25088          the AAPCS, since these should never be made callee-saved by user
25089          options.  */
25090       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25091         continue;
25092       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25093         continue;
25094       if (call_used_regs[regno])
25095         to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25096     }
25097
25098   /* Make sure we do not clear the registers used to return the result in.  */
25099   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25100   if (!VOID_TYPE_P (result_type))
25101     {
25102       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25103
25104       /* No need to check that we return in registers, because we don't
25105          support returning on stack yet.  */
25106       to_clear_mask[0]
25107         &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25108                                        padding_bits_to_clear_ptr);
25109     }
25110
25111   if (padding_bits_to_clear != 0)
25112     {
25113       rtx reg_rtx;
25114       /* Padding bits to clear is not 0 so we know we are dealing with
25115          returning a composite type, which only uses r0.  Let's make sure that
25116          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25117       gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25118
25119       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25120
25121       /* Fill the lower half of the negated padding_bits_to_clear.  */
25122       emit_move_insn (reg_rtx,
25123                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25124
25125       /* Also fill the top half of the negated padding_bits_to_clear.  */
25126       if (((~padding_bits_to_clear) >> 16) > 0)
25127         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25128                                                       GEN_INT (16),
25129                                                       GEN_INT (16)),
25130                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25131
25132       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25133                            gen_rtx_REG (SImode, R0_REGNUM),
25134                            reg_rtx));
25135     }
25136
25137   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25138     {
25139       if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25140         continue;
25141
25142       if (IS_VFP_REGNUM (regno))
25143         {
25144           /* If regno is an even vfp register and its successor is also to
25145              be cleared, use vmov.  */
25146           if (TARGET_VFP_DOUBLE
25147               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25148               && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25149             {
25150               emit_move_insn (gen_rtx_REG (DFmode, regno),
25151                               CONST1_RTX (DFmode));
25152               emit_use (gen_rtx_REG (DFmode, regno));
25153               regno++;
25154             }
25155           else
25156             {
25157               emit_move_insn (gen_rtx_REG (SFmode, regno),
25158                               CONST1_RTX (SFmode));
25159               emit_use (gen_rtx_REG (SFmode, regno));
25160             }
25161         }
25162       else
25163         {
25164           if (TARGET_THUMB1)
25165             {
25166               if (regno == R0_REGNUM)
25167                 emit_move_insn (gen_rtx_REG (SImode, regno),
25168                                 const0_rtx);
25169               else
25170                 /* R0 has either been cleared before, see code above, or it
25171                    holds a return value, either way it is not secret
25172                    information.  */
25173                 emit_move_insn (gen_rtx_REG (SImode, regno),
25174                                 gen_rtx_REG (SImode, R0_REGNUM));
25175               emit_use (gen_rtx_REG (SImode, regno));
25176             }
25177           else
25178             {
25179               emit_move_insn (gen_rtx_REG (SImode, regno),
25180                               gen_rtx_REG (SImode, LR_REGNUM));
25181               emit_use (gen_rtx_REG (SImode, regno));
25182             }
25183         }
25184     }
25185 }
25186
25187 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25188    POP instruction can be generated.  LR should be replaced by PC.  All
25189    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25190    all we really need to check here is if single register is to be
25191    returned, or multiple register return.  */
25192 void
25193 thumb2_expand_return (bool simple_return)
25194 {
25195   int i, num_regs;
25196   unsigned long saved_regs_mask;
25197   arm_stack_offsets *offsets;
25198
25199   offsets = arm_get_frame_offsets ();
25200   saved_regs_mask = offsets->saved_regs_mask;
25201
25202   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25203     if (saved_regs_mask & (1 << i))
25204       num_regs++;
25205
25206   if (!simple_return && saved_regs_mask)
25207     {
25208       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25209          functions or adapt code to handle according to ACLE.  This path should
25210          not be reachable for cmse_nonsecure_entry functions though we prefer
25211          to assert it for now to ensure that future code changes do not silently
25212          change this behavior.  */
25213       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25214       if (num_regs == 1)
25215         {
25216           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25217           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25218           rtx addr = gen_rtx_MEM (SImode,
25219                                   gen_rtx_POST_INC (SImode,
25220                                                     stack_pointer_rtx));
25221           set_mem_alias_set (addr, get_frame_alias_set ());
25222           XVECEXP (par, 0, 0) = ret_rtx;
25223           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25224           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25225           emit_jump_insn (par);
25226         }
25227       else
25228         {
25229           saved_regs_mask &= ~ (1 << LR_REGNUM);
25230           saved_regs_mask |=   (1 << PC_REGNUM);
25231           arm_emit_multi_reg_pop (saved_regs_mask);
25232         }
25233     }
25234   else
25235     {
25236       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25237         cmse_nonsecure_entry_clear_before_return ();
25238       emit_jump_insn (simple_return_rtx);
25239     }
25240 }
25241
25242 void
25243 thumb1_expand_epilogue (void)
25244 {
25245   HOST_WIDE_INT amount;
25246   arm_stack_offsets *offsets;
25247   int regno;
25248
25249   /* Naked functions don't have prologues.  */
25250   if (IS_NAKED (arm_current_func_type ()))
25251     return;
25252
25253   offsets = arm_get_frame_offsets ();
25254   amount = offsets->outgoing_args - offsets->saved_regs;
25255
25256   if (frame_pointer_needed)
25257     {
25258       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25259       amount = offsets->locals_base - offsets->saved_regs;
25260     }
25261   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25262
25263   gcc_assert (amount >= 0);
25264   if (amount)
25265     {
25266       emit_insn (gen_blockage ());
25267
25268       if (amount < 512)
25269         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25270                                GEN_INT (amount)));
25271       else
25272         {
25273           /* r3 is always free in the epilogue.  */
25274           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25275
25276           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25277           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25278         }
25279     }
25280
25281   /* Emit a USE (stack_pointer_rtx), so that
25282      the stack adjustment will not be deleted.  */
25283   emit_insn (gen_force_register_use (stack_pointer_rtx));
25284
25285   if (crtl->profile || !TARGET_SCHED_PROLOG)
25286     emit_insn (gen_blockage ());
25287
25288   /* Emit a clobber for each insn that will be restored in the epilogue,
25289      so that flow2 will get register lifetimes correct.  */
25290   for (regno = 0; regno < 13; regno++)
25291     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25292       emit_clobber (gen_rtx_REG (SImode, regno));
25293
25294   if (! df_regs_ever_live_p (LR_REGNUM))
25295     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25296
25297   /* Clear all caller-saved regs that are not used to return.  */
25298   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25299     cmse_nonsecure_entry_clear_before_return ();
25300 }
25301
25302 /* Epilogue code for APCS frame.  */
25303 static void
25304 arm_expand_epilogue_apcs_frame (bool really_return)
25305 {
25306   unsigned long func_type;
25307   unsigned long saved_regs_mask;
25308   int num_regs = 0;
25309   int i;
25310   int floats_from_frame = 0;
25311   arm_stack_offsets *offsets;
25312
25313   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25314   func_type = arm_current_func_type ();
25315
25316   /* Get frame offsets for ARM.  */
25317   offsets = arm_get_frame_offsets ();
25318   saved_regs_mask = offsets->saved_regs_mask;
25319
25320   /* Find the offset of the floating-point save area in the frame.  */
25321   floats_from_frame
25322     = (offsets->saved_args
25323        + arm_compute_static_chain_stack_bytes ()
25324        - offsets->frame);
25325
25326   /* Compute how many core registers saved and how far away the floats are.  */
25327   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25328     if (saved_regs_mask & (1 << i))
25329       {
25330         num_regs++;
25331         floats_from_frame += 4;
25332       }
25333
25334   if (TARGET_HARD_FLOAT)
25335     {
25336       int start_reg;
25337       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25338
25339       /* The offset is from IP_REGNUM.  */
25340       int saved_size = arm_get_vfp_saved_size ();
25341       if (saved_size > 0)
25342         {
25343           rtx_insn *insn;
25344           floats_from_frame += saved_size;
25345           insn = emit_insn (gen_addsi3 (ip_rtx,
25346                                         hard_frame_pointer_rtx,
25347                                         GEN_INT (-floats_from_frame)));
25348           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25349                                        ip_rtx, hard_frame_pointer_rtx);
25350         }
25351
25352       /* Generate VFP register multi-pop.  */
25353       start_reg = FIRST_VFP_REGNUM;
25354
25355       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25356         /* Look for a case where a reg does not need restoring.  */
25357         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25358             && (!df_regs_ever_live_p (i + 1)
25359                 || call_used_regs[i + 1]))
25360           {
25361             if (start_reg != i)
25362               arm_emit_vfp_multi_reg_pop (start_reg,
25363                                           (i - start_reg) / 2,
25364                                           gen_rtx_REG (SImode,
25365                                                        IP_REGNUM));
25366             start_reg = i + 2;
25367           }
25368
25369       /* Restore the remaining regs that we have discovered (or possibly
25370          even all of them, if the conditional in the for loop never
25371          fired).  */
25372       if (start_reg != i)
25373         arm_emit_vfp_multi_reg_pop (start_reg,
25374                                     (i - start_reg) / 2,
25375                                     gen_rtx_REG (SImode, IP_REGNUM));
25376     }
25377
25378   if (TARGET_IWMMXT)
25379     {
25380       /* The frame pointer is guaranteed to be non-double-word aligned, as
25381          it is set to double-word-aligned old_stack_pointer - 4.  */
25382       rtx_insn *insn;
25383       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25384
25385       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25386         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25387           {
25388             rtx addr = gen_frame_mem (V2SImode,
25389                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25390                                                 - lrm_count * 4));
25391             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25392             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25393                                                gen_rtx_REG (V2SImode, i),
25394                                                NULL_RTX);
25395             lrm_count += 2;
25396           }
25397     }
25398
25399   /* saved_regs_mask should contain IP which contains old stack pointer
25400      at the time of activation creation.  Since SP and IP are adjacent registers,
25401      we can restore the value directly into SP.  */
25402   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25403   saved_regs_mask &= ~(1 << IP_REGNUM);
25404   saved_regs_mask |= (1 << SP_REGNUM);
25405
25406   /* There are two registers left in saved_regs_mask - LR and PC.  We
25407      only need to restore LR (the return address), but to
25408      save time we can load it directly into PC, unless we need a
25409      special function exit sequence, or we are not really returning.  */
25410   if (really_return
25411       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25412       && !crtl->calls_eh_return)
25413     /* Delete LR from the register mask, so that LR on
25414        the stack is loaded into the PC in the register mask.  */
25415     saved_regs_mask &= ~(1 << LR_REGNUM);
25416   else
25417     saved_regs_mask &= ~(1 << PC_REGNUM);
25418
25419   num_regs = bit_count (saved_regs_mask);
25420   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25421     {
25422       rtx_insn *insn;
25423       emit_insn (gen_blockage ());
25424       /* Unwind the stack to just below the saved registers.  */
25425       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25426                                     hard_frame_pointer_rtx,
25427                                     GEN_INT (- 4 * num_regs)));
25428
25429       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25430                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25431     }
25432
25433   arm_emit_multi_reg_pop (saved_regs_mask);
25434
25435   if (IS_INTERRUPT (func_type))
25436     {
25437       /* Interrupt handlers will have pushed the
25438          IP onto the stack, so restore it now.  */
25439       rtx_insn *insn;
25440       rtx addr = gen_rtx_MEM (SImode,
25441                               gen_rtx_POST_INC (SImode,
25442                               stack_pointer_rtx));
25443       set_mem_alias_set (addr, get_frame_alias_set ());
25444       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25445       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25446                                          gen_rtx_REG (SImode, IP_REGNUM),
25447                                          NULL_RTX);
25448     }
25449
25450   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25451     return;
25452
25453   if (crtl->calls_eh_return)
25454     emit_insn (gen_addsi3 (stack_pointer_rtx,
25455                            stack_pointer_rtx,
25456                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25457
25458   if (IS_STACKALIGN (func_type))
25459     /* Restore the original stack pointer.  Before prologue, the stack was
25460        realigned and the original stack pointer saved in r0.  For details,
25461        see comment in arm_expand_prologue.  */
25462     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25463
25464   emit_jump_insn (simple_return_rtx);
25465 }
25466
25467 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25468    function is not a sibcall.  */
25469 void
25470 arm_expand_epilogue (bool really_return)
25471 {
25472   unsigned long func_type;
25473   unsigned long saved_regs_mask;
25474   int num_regs = 0;
25475   int i;
25476   int amount;
25477   arm_stack_offsets *offsets;
25478
25479   func_type = arm_current_func_type ();
25480
25481   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25482      let output_return_instruction take care of instruction emission if any.  */
25483   if (IS_NAKED (func_type)
25484       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25485     {
25486       if (really_return)
25487         emit_jump_insn (simple_return_rtx);
25488       return;
25489     }
25490
25491   /* If we are throwing an exception, then we really must be doing a
25492      return, so we can't tail-call.  */
25493   gcc_assert (!crtl->calls_eh_return || really_return);
25494
25495   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25496     {
25497       arm_expand_epilogue_apcs_frame (really_return);
25498       return;
25499     }
25500
25501   /* Get frame offsets for ARM.  */
25502   offsets = arm_get_frame_offsets ();
25503   saved_regs_mask = offsets->saved_regs_mask;
25504   num_regs = bit_count (saved_regs_mask);
25505
25506   if (frame_pointer_needed)
25507     {
25508       rtx_insn *insn;
25509       /* Restore stack pointer if necessary.  */
25510       if (TARGET_ARM)
25511         {
25512           /* In ARM mode, frame pointer points to first saved register.
25513              Restore stack pointer to last saved register.  */
25514           amount = offsets->frame - offsets->saved_regs;
25515
25516           /* Force out any pending memory operations that reference stacked data
25517              before stack de-allocation occurs.  */
25518           emit_insn (gen_blockage ());
25519           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25520                             hard_frame_pointer_rtx,
25521                             GEN_INT (amount)));
25522           arm_add_cfa_adjust_cfa_note (insn, amount,
25523                                        stack_pointer_rtx,
25524                                        hard_frame_pointer_rtx);
25525
25526           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25527              deleted.  */
25528           emit_insn (gen_force_register_use (stack_pointer_rtx));
25529         }
25530       else
25531         {
25532           /* In Thumb-2 mode, the frame pointer points to the last saved
25533              register.  */
25534           amount = offsets->locals_base - offsets->saved_regs;
25535           if (amount)
25536             {
25537               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25538                                 hard_frame_pointer_rtx,
25539                                 GEN_INT (amount)));
25540               arm_add_cfa_adjust_cfa_note (insn, amount,
25541                                            hard_frame_pointer_rtx,
25542                                            hard_frame_pointer_rtx);
25543             }
25544
25545           /* Force out any pending memory operations that reference stacked data
25546              before stack de-allocation occurs.  */
25547           emit_insn (gen_blockage ());
25548           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25549                                        hard_frame_pointer_rtx));
25550           arm_add_cfa_adjust_cfa_note (insn, 0,
25551                                        stack_pointer_rtx,
25552                                        hard_frame_pointer_rtx);
25553           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25554              deleted.  */
25555           emit_insn (gen_force_register_use (stack_pointer_rtx));
25556         }
25557     }
25558   else
25559     {
25560       /* Pop off outgoing args and local frame to adjust stack pointer to
25561          last saved register.  */
25562       amount = offsets->outgoing_args - offsets->saved_regs;
25563       if (amount)
25564         {
25565           rtx_insn *tmp;
25566           /* Force out any pending memory operations that reference stacked data
25567              before stack de-allocation occurs.  */
25568           emit_insn (gen_blockage ());
25569           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25570                                        stack_pointer_rtx,
25571                                        GEN_INT (amount)));
25572           arm_add_cfa_adjust_cfa_note (tmp, amount,
25573                                        stack_pointer_rtx, stack_pointer_rtx);
25574           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25575              not deleted.  */
25576           emit_insn (gen_force_register_use (stack_pointer_rtx));
25577         }
25578     }
25579
25580   if (TARGET_HARD_FLOAT)
25581     {
25582       /* Generate VFP register multi-pop.  */
25583       int end_reg = LAST_VFP_REGNUM + 1;
25584
25585       /* Scan the registers in reverse order.  We need to match
25586          any groupings made in the prologue and generate matching
25587          vldm operations.  The need to match groups is because,
25588          unlike pop, vldm can only do consecutive regs.  */
25589       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25590         /* Look for a case where a reg does not need restoring.  */
25591         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25592             && (!df_regs_ever_live_p (i + 1)
25593                 || call_used_regs[i + 1]))
25594           {
25595             /* Restore the regs discovered so far (from reg+2 to
25596                end_reg).  */
25597             if (end_reg > i + 2)
25598               arm_emit_vfp_multi_reg_pop (i + 2,
25599                                           (end_reg - (i + 2)) / 2,
25600                                           stack_pointer_rtx);
25601             end_reg = i;
25602           }
25603
25604       /* Restore the remaining regs that we have discovered (or possibly
25605          even all of them, if the conditional in the for loop never
25606          fired).  */
25607       if (end_reg > i + 2)
25608         arm_emit_vfp_multi_reg_pop (i + 2,
25609                                     (end_reg - (i + 2)) / 2,
25610                                     stack_pointer_rtx);
25611     }
25612
25613   if (TARGET_IWMMXT)
25614     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25615       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25616         {
25617           rtx_insn *insn;
25618           rtx addr = gen_rtx_MEM (V2SImode,
25619                                   gen_rtx_POST_INC (SImode,
25620                                                     stack_pointer_rtx));
25621           set_mem_alias_set (addr, get_frame_alias_set ());
25622           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25623           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25624                                              gen_rtx_REG (V2SImode, i),
25625                                              NULL_RTX);
25626           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25627                                        stack_pointer_rtx, stack_pointer_rtx);
25628         }
25629
25630   if (saved_regs_mask)
25631     {
25632       rtx insn;
25633       bool return_in_pc = false;
25634
25635       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25636           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25637           && !IS_CMSE_ENTRY (func_type)
25638           && !IS_STACKALIGN (func_type)
25639           && really_return
25640           && crtl->args.pretend_args_size == 0
25641           && saved_regs_mask & (1 << LR_REGNUM)
25642           && !crtl->calls_eh_return)
25643         {
25644           saved_regs_mask &= ~(1 << LR_REGNUM);
25645           saved_regs_mask |= (1 << PC_REGNUM);
25646           return_in_pc = true;
25647         }
25648
25649       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25650         {
25651           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25652             if (saved_regs_mask & (1 << i))
25653               {
25654                 rtx addr = gen_rtx_MEM (SImode,
25655                                         gen_rtx_POST_INC (SImode,
25656                                                           stack_pointer_rtx));
25657                 set_mem_alias_set (addr, get_frame_alias_set ());
25658
25659                 if (i == PC_REGNUM)
25660                   {
25661                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25662                     XVECEXP (insn, 0, 0) = ret_rtx;
25663                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25664                                                         addr);
25665                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25666                     insn = emit_jump_insn (insn);
25667                   }
25668                 else
25669                   {
25670                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25671                                                  addr));
25672                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25673                                                        gen_rtx_REG (SImode, i),
25674                                                        NULL_RTX);
25675                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25676                                                  stack_pointer_rtx,
25677                                                  stack_pointer_rtx);
25678                   }
25679               }
25680         }
25681       else
25682         {
25683           if (TARGET_LDRD
25684               && current_tune->prefer_ldrd_strd
25685               && !optimize_function_for_size_p (cfun))
25686             {
25687               if (TARGET_THUMB2)
25688                 thumb2_emit_ldrd_pop (saved_regs_mask);
25689               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25690                 arm_emit_ldrd_pop (saved_regs_mask);
25691               else
25692                 arm_emit_multi_reg_pop (saved_regs_mask);
25693             }
25694           else
25695             arm_emit_multi_reg_pop (saved_regs_mask);
25696         }
25697
25698       if (return_in_pc)
25699         return;
25700     }
25701
25702   amount
25703     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25704   if (amount)
25705     {
25706       int i, j;
25707       rtx dwarf = NULL_RTX;
25708       rtx_insn *tmp =
25709         emit_insn (gen_addsi3 (stack_pointer_rtx,
25710                                stack_pointer_rtx,
25711                                GEN_INT (amount)));
25712
25713       RTX_FRAME_RELATED_P (tmp) = 1;
25714
25715       if (cfun->machine->uses_anonymous_args)
25716         {
25717           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25718              pretend_args in stack.  */
25719           int num_regs = crtl->args.pretend_args_size / 4;
25720           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25721           for (j = 0, i = 0; j < num_regs; i++)
25722             if (saved_regs_mask & (1 << i))
25723               {
25724                 rtx reg = gen_rtx_REG (SImode, i);
25725                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25726                 j++;
25727               }
25728           REG_NOTES (tmp) = dwarf;
25729         }
25730       arm_add_cfa_adjust_cfa_note (tmp, amount,
25731                                    stack_pointer_rtx, stack_pointer_rtx);
25732     }
25733
25734     /* Clear all caller-saved regs that are not used to return.  */
25735     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25736       {
25737         /* CMSE_ENTRY always returns.  */
25738         gcc_assert (really_return);
25739         cmse_nonsecure_entry_clear_before_return ();
25740       }
25741
25742   if (!really_return)
25743     return;
25744
25745   if (crtl->calls_eh_return)
25746     emit_insn (gen_addsi3 (stack_pointer_rtx,
25747                            stack_pointer_rtx,
25748                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25749
25750   if (IS_STACKALIGN (func_type))
25751     /* Restore the original stack pointer.  Before prologue, the stack was
25752        realigned and the original stack pointer saved in r0.  For details,
25753        see comment in arm_expand_prologue.  */
25754     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25755
25756   emit_jump_insn (simple_return_rtx);
25757 }
25758
25759 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25760    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25761
25762 const char *
25763 thumb1_output_interwork (void)
25764 {
25765   const char * name;
25766   FILE *f = asm_out_file;
25767
25768   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25769   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25770               == SYMBOL_REF);
25771   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25772
25773   /* Generate code sequence to switch us into Thumb mode.  */
25774   /* The .code 32 directive has already been emitted by
25775      ASM_DECLARE_FUNCTION_NAME.  */
25776   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25777   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25778
25779   /* Generate a label, so that the debugger will notice the
25780      change in instruction sets.  This label is also used by
25781      the assembler to bypass the ARM code when this function
25782      is called from a Thumb encoded function elsewhere in the
25783      same file.  Hence the definition of STUB_NAME here must
25784      agree with the definition in gas/config/tc-arm.c.  */
25785
25786 #define STUB_NAME ".real_start_of"
25787
25788   fprintf (f, "\t.code\t16\n");
25789 #ifdef ARM_PE
25790   if (arm_dllexport_name_p (name))
25791     name = arm_strip_name_encoding (name);
25792 #endif
25793   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25794   fprintf (f, "\t.thumb_func\n");
25795   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25796
25797   return "";
25798 }
25799
25800 /* Handle the case of a double word load into a low register from
25801    a computed memory address.  The computed address may involve a
25802    register which is overwritten by the load.  */
25803 const char *
25804 thumb_load_double_from_address (rtx *operands)
25805 {
25806   rtx addr;
25807   rtx base;
25808   rtx offset;
25809   rtx arg1;
25810   rtx arg2;
25811
25812   gcc_assert (REG_P (operands[0]));
25813   gcc_assert (MEM_P (operands[1]));
25814
25815   /* Get the memory address.  */
25816   addr = XEXP (operands[1], 0);
25817
25818   /* Work out how the memory address is computed.  */
25819   switch (GET_CODE (addr))
25820     {
25821     case REG:
25822       operands[2] = adjust_address (operands[1], SImode, 4);
25823
25824       if (REGNO (operands[0]) == REGNO (addr))
25825         {
25826           output_asm_insn ("ldr\t%H0, %2", operands);
25827           output_asm_insn ("ldr\t%0, %1", operands);
25828         }
25829       else
25830         {
25831           output_asm_insn ("ldr\t%0, %1", operands);
25832           output_asm_insn ("ldr\t%H0, %2", operands);
25833         }
25834       break;
25835
25836     case CONST:
25837       /* Compute <address> + 4 for the high order load.  */
25838       operands[2] = adjust_address (operands[1], SImode, 4);
25839
25840       output_asm_insn ("ldr\t%0, %1", operands);
25841       output_asm_insn ("ldr\t%H0, %2", operands);
25842       break;
25843
25844     case PLUS:
25845       arg1   = XEXP (addr, 0);
25846       arg2   = XEXP (addr, 1);
25847
25848       if (CONSTANT_P (arg1))
25849         base = arg2, offset = arg1;
25850       else
25851         base = arg1, offset = arg2;
25852
25853       gcc_assert (REG_P (base));
25854
25855       /* Catch the case of <address> = <reg> + <reg> */
25856       if (REG_P (offset))
25857         {
25858           int reg_offset = REGNO (offset);
25859           int reg_base   = REGNO (base);
25860           int reg_dest   = REGNO (operands[0]);
25861
25862           /* Add the base and offset registers together into the
25863              higher destination register.  */
25864           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25865                        reg_dest + 1, reg_base, reg_offset);
25866
25867           /* Load the lower destination register from the address in
25868              the higher destination register.  */
25869           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25870                        reg_dest, reg_dest + 1);
25871
25872           /* Load the higher destination register from its own address
25873              plus 4.  */
25874           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25875                        reg_dest + 1, reg_dest + 1);
25876         }
25877       else
25878         {
25879           /* Compute <address> + 4 for the high order load.  */
25880           operands[2] = adjust_address (operands[1], SImode, 4);
25881
25882           /* If the computed address is held in the low order register
25883              then load the high order register first, otherwise always
25884              load the low order register first.  */
25885           if (REGNO (operands[0]) == REGNO (base))
25886             {
25887               output_asm_insn ("ldr\t%H0, %2", operands);
25888               output_asm_insn ("ldr\t%0, %1", operands);
25889             }
25890           else
25891             {
25892               output_asm_insn ("ldr\t%0, %1", operands);
25893               output_asm_insn ("ldr\t%H0, %2", operands);
25894             }
25895         }
25896       break;
25897
25898     case LABEL_REF:
25899       /* With no registers to worry about we can just load the value
25900          directly.  */
25901       operands[2] = adjust_address (operands[1], SImode, 4);
25902
25903       output_asm_insn ("ldr\t%H0, %2", operands);
25904       output_asm_insn ("ldr\t%0, %1", operands);
25905       break;
25906
25907     default:
25908       gcc_unreachable ();
25909     }
25910
25911   return "";
25912 }
25913
25914 const char *
25915 thumb_output_move_mem_multiple (int n, rtx *operands)
25916 {
25917   switch (n)
25918     {
25919     case 2:
25920       if (REGNO (operands[4]) > REGNO (operands[5]))
25921         std::swap (operands[4], operands[5]);
25922
25923       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25924       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25925       break;
25926
25927     case 3:
25928       if (REGNO (operands[4]) > REGNO (operands[5]))
25929         std::swap (operands[4], operands[5]);
25930       if (REGNO (operands[5]) > REGNO (operands[6]))
25931         std::swap (operands[5], operands[6]);
25932       if (REGNO (operands[4]) > REGNO (operands[5]))
25933         std::swap (operands[4], operands[5]);
25934
25935       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25936       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25937       break;
25938
25939     default:
25940       gcc_unreachable ();
25941     }
25942
25943   return "";
25944 }
25945
25946 /* Output a call-via instruction for thumb state.  */
25947 const char *
25948 thumb_call_via_reg (rtx reg)
25949 {
25950   int regno = REGNO (reg);
25951   rtx *labelp;
25952
25953   gcc_assert (regno < LR_REGNUM);
25954
25955   /* If we are in the normal text section we can use a single instance
25956      per compilation unit.  If we are doing function sections, then we need
25957      an entry per section, since we can't rely on reachability.  */
25958   if (in_section == text_section)
25959     {
25960       thumb_call_reg_needed = 1;
25961
25962       if (thumb_call_via_label[regno] == NULL)
25963         thumb_call_via_label[regno] = gen_label_rtx ();
25964       labelp = thumb_call_via_label + regno;
25965     }
25966   else
25967     {
25968       if (cfun->machine->call_via[regno] == NULL)
25969         cfun->machine->call_via[regno] = gen_label_rtx ();
25970       labelp = cfun->machine->call_via + regno;
25971     }
25972
25973   output_asm_insn ("bl\t%a0", labelp);
25974   return "";
25975 }
25976
25977 /* Routines for generating rtl.  */
25978 void
25979 thumb_expand_movmemqi (rtx *operands)
25980 {
25981   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25982   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25983   HOST_WIDE_INT len = INTVAL (operands[2]);
25984   HOST_WIDE_INT offset = 0;
25985
25986   while (len >= 12)
25987     {
25988       emit_insn (gen_movmem12b (out, in, out, in));
25989       len -= 12;
25990     }
25991
25992   if (len >= 8)
25993     {
25994       emit_insn (gen_movmem8b (out, in, out, in));
25995       len -= 8;
25996     }
25997
25998   if (len >= 4)
25999     {
26000       rtx reg = gen_reg_rtx (SImode);
26001       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26002       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26003       len -= 4;
26004       offset += 4;
26005     }
26006
26007   if (len >= 2)
26008     {
26009       rtx reg = gen_reg_rtx (HImode);
26010       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26011                                               plus_constant (Pmode, in,
26012                                                              offset))));
26013       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26014                                                                 offset)),
26015                             reg));
26016       len -= 2;
26017       offset += 2;
26018     }
26019
26020   if (len)
26021     {
26022       rtx reg = gen_reg_rtx (QImode);
26023       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26024                                               plus_constant (Pmode, in,
26025                                                              offset))));
26026       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26027                                                                 offset)),
26028                             reg));
26029     }
26030 }
26031
26032 void
26033 thumb_reload_out_hi (rtx *operands)
26034 {
26035   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26036 }
26037
26038 /* Return the length of a function name prefix
26039     that starts with the character 'c'.  */
26040 static int
26041 arm_get_strip_length (int c)
26042 {
26043   switch (c)
26044     {
26045     ARM_NAME_ENCODING_LENGTHS
26046       default: return 0;
26047     }
26048 }
26049
26050 /* Return a pointer to a function's name with any
26051    and all prefix encodings stripped from it.  */
26052 const char *
26053 arm_strip_name_encoding (const char *name)
26054 {
26055   int skip;
26056
26057   while ((skip = arm_get_strip_length (* name)))
26058     name += skip;
26059
26060   return name;
26061 }
26062
26063 /* If there is a '*' anywhere in the name's prefix, then
26064    emit the stripped name verbatim, otherwise prepend an
26065    underscore if leading underscores are being used.  */
26066 void
26067 arm_asm_output_labelref (FILE *stream, const char *name)
26068 {
26069   int skip;
26070   int verbatim = 0;
26071
26072   while ((skip = arm_get_strip_length (* name)))
26073     {
26074       verbatim |= (*name == '*');
26075       name += skip;
26076     }
26077
26078   if (verbatim)
26079     fputs (name, stream);
26080   else
26081     asm_fprintf (stream, "%U%s", name);
26082 }
26083
26084 /* This function is used to emit an EABI tag and its associated value.
26085    We emit the numerical value of the tag in case the assembler does not
26086    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26087    the tag name in a comment so that anyone reading the assembler output
26088    will know which tag is being set.
26089
26090    This function is not static because arm-c.c needs it too.  */
26091
26092 void
26093 arm_emit_eabi_attribute (const char *name, int num, int val)
26094 {
26095   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26096   if (flag_verbose_asm || flag_debug_asm)
26097     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26098   asm_fprintf (asm_out_file, "\n");
26099 }
26100
26101 /* This function is used to print CPU tuning information as comment
26102    in assembler file.  Pointers are not printed for now.  */
26103
26104 void
26105 arm_print_tune_info (void)
26106 {
26107   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26108   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26109                current_tune->constant_limit);
26110   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26111                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26112   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26113                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26114   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26115                "prefetch.l1_cache_size:\t%d\n",
26116                current_tune->prefetch.l1_cache_size);
26117   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26118                "prefetch.l1_cache_line_size:\t%d\n",
26119                current_tune->prefetch.l1_cache_line_size);
26120   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26121                "prefer_constant_pool:\t%d\n",
26122                (int) current_tune->prefer_constant_pool);
26123   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26124                "branch_cost:\t(s:speed, p:predictable)\n");
26125   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26126   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26127                current_tune->branch_cost (false, false));
26128   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26129                current_tune->branch_cost (false, true));
26130   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26131                current_tune->branch_cost (true, false));
26132   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26133                current_tune->branch_cost (true, true));
26134   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26135                "prefer_ldrd_strd:\t%d\n",
26136                (int) current_tune->prefer_ldrd_strd);
26137   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26138                "logical_op_non_short_circuit:\t[%d,%d]\n",
26139                (int) current_tune->logical_op_non_short_circuit_thumb,
26140                (int) current_tune->logical_op_non_short_circuit_arm);
26141   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26142                "prefer_neon_for_64bits:\t%d\n",
26143                (int) current_tune->prefer_neon_for_64bits);
26144   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26145                "disparage_flag_setting_t16_encodings:\t%d\n",
26146                (int) current_tune->disparage_flag_setting_t16_encodings);
26147   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26148                "string_ops_prefer_neon:\t%d\n",
26149                (int) current_tune->string_ops_prefer_neon);
26150   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26151                "max_insns_inline_memset:\t%d\n",
26152                current_tune->max_insns_inline_memset);
26153   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26154                current_tune->fusible_ops);
26155   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26156                (int) current_tune->sched_autopref);
26157 }
26158
26159 /* Print .arch and .arch_extension directives corresponding to the
26160    current architecture configuration.  */
26161 static void
26162 arm_print_asm_arch_directives ()
26163 {
26164   const arch_option *arch
26165     = arm_parse_arch_option_name (all_architectures, "-march",
26166                                   arm_active_target.arch_name);
26167   auto_sbitmap opt_bits (isa_num_bits);
26168
26169   gcc_assert (arch);
26170
26171   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26172   if (!arch->common.extensions)
26173     return;
26174
26175   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26176        opt->name != NULL;
26177        opt++)
26178     {
26179       if (!opt->remove)
26180         {
26181           arm_initialize_isa (opt_bits, opt->isa_bits);
26182
26183           /* If every feature bit of this option is set in the target
26184              ISA specification, print out the option name.  However,
26185              don't print anything if all the bits are part of the
26186              FPU specification.  */
26187           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26188               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26189             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26190         }
26191     }
26192 }
26193
26194 static void
26195 arm_file_start (void)
26196 {
26197   int val;
26198
26199   if (TARGET_BPABI)
26200     {
26201       /* We don't have a specified CPU.  Use the architecture to
26202          generate the tags.
26203
26204          Note: it might be better to do this unconditionally, then the
26205          assembler would not need to know about all new CPU names as
26206          they are added.  */
26207       if (!arm_active_target.core_name)
26208         {
26209           /* armv7ve doesn't support any extensions.  */
26210           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26211             {
26212               /* Keep backward compatability for assemblers
26213                  which don't support armv7ve.  */
26214               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26215               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26216               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26217               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26218               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26219             }
26220           else
26221             arm_print_asm_arch_directives ();
26222         }
26223       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26224         asm_fprintf (asm_out_file, "\t.arch %s\n",
26225                      arm_active_target.core_name + 8);
26226       else
26227         {
26228           const char* truncated_name
26229             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26230           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26231         }
26232
26233       if (print_tune_info)
26234         arm_print_tune_info ();
26235
26236       if (! TARGET_SOFT_FLOAT)
26237         {
26238           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26239             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26240
26241           if (TARGET_HARD_FLOAT_ABI)
26242             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26243         }
26244
26245       /* Some of these attributes only apply when the corresponding features
26246          are used.  However we don't have any easy way of figuring this out.
26247          Conservatively record the setting that would have been used.  */
26248
26249       if (flag_rounding_math)
26250         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26251
26252       if (!flag_unsafe_math_optimizations)
26253         {
26254           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26255           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26256         }
26257       if (flag_signaling_nans)
26258         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26259
26260       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26261                            flag_finite_math_only ? 1 : 3);
26262
26263       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26264       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26265       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26266                                flag_short_enums ? 1 : 2);
26267
26268       /* Tag_ABI_optimization_goals.  */
26269       if (optimize_size)
26270         val = 4;
26271       else if (optimize >= 2)
26272         val = 2;
26273       else if (optimize)
26274         val = 1;
26275       else
26276         val = 6;
26277       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26278
26279       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26280                                unaligned_access);
26281
26282       if (arm_fp16_format)
26283         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26284                              (int) arm_fp16_format);
26285
26286       if (arm_lang_output_object_attributes_hook)
26287         arm_lang_output_object_attributes_hook();
26288     }
26289
26290   default_file_start ();
26291 }
26292
26293 static void
26294 arm_file_end (void)
26295 {
26296   int regno;
26297
26298   if (NEED_INDICATE_EXEC_STACK)
26299     /* Add .note.GNU-stack.  */
26300     file_end_indicate_exec_stack ();
26301
26302   if (! thumb_call_reg_needed)
26303     return;
26304
26305   switch_to_section (text_section);
26306   asm_fprintf (asm_out_file, "\t.code 16\n");
26307   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26308
26309   for (regno = 0; regno < LR_REGNUM; regno++)
26310     {
26311       rtx label = thumb_call_via_label[regno];
26312
26313       if (label != 0)
26314         {
26315           targetm.asm_out.internal_label (asm_out_file, "L",
26316                                           CODE_LABEL_NUMBER (label));
26317           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26318         }
26319     }
26320 }
26321
26322 #ifndef ARM_PE
26323 /* Symbols in the text segment can be accessed without indirecting via the
26324    constant pool; it may take an extra binary operation, but this is still
26325    faster than indirecting via memory.  Don't do this when not optimizing,
26326    since we won't be calculating al of the offsets necessary to do this
26327    simplification.  */
26328
26329 static void
26330 arm_encode_section_info (tree decl, rtx rtl, int first)
26331 {
26332   if (optimize > 0 && TREE_CONSTANT (decl))
26333     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26334
26335   default_encode_section_info (decl, rtl, first);
26336 }
26337 #endif /* !ARM_PE */
26338
26339 static void
26340 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26341 {
26342   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26343       && !strcmp (prefix, "L"))
26344     {
26345       arm_ccfsm_state = 0;
26346       arm_target_insn = NULL;
26347     }
26348   default_internal_label (stream, prefix, labelno);
26349 }
26350
26351 /* Output code to add DELTA to the first argument, and then jump
26352    to FUNCTION.  Used for C++ multiple inheritance.  */
26353
26354 static void
26355 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26356                      HOST_WIDE_INT, tree function)
26357 {
26358   static int thunk_label = 0;
26359   char label[256];
26360   char labelpc[256];
26361   int mi_delta = delta;
26362   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26363   int shift = 0;
26364   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26365                     ? 1 : 0);
26366   if (mi_delta < 0)
26367     mi_delta = - mi_delta;
26368
26369   final_start_function (emit_barrier (), file, 1);
26370
26371   if (TARGET_THUMB1)
26372     {
26373       int labelno = thunk_label++;
26374       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26375       /* Thunks are entered in arm mode when available.  */
26376       if (TARGET_THUMB1_ONLY)
26377         {
26378           /* push r3 so we can use it as a temporary.  */
26379           /* TODO: Omit this save if r3 is not used.  */
26380           fputs ("\tpush {r3}\n", file);
26381           fputs ("\tldr\tr3, ", file);
26382         }
26383       else
26384         {
26385           fputs ("\tldr\tr12, ", file);
26386         }
26387       assemble_name (file, label);
26388       fputc ('\n', file);
26389       if (flag_pic)
26390         {
26391           /* If we are generating PIC, the ldr instruction below loads
26392              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26393              the address of the add + 8, so we have:
26394
26395              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26396                  = target + 1.
26397
26398              Note that we have "+ 1" because some versions of GNU ld
26399              don't set the low bit of the result for R_ARM_REL32
26400              relocations against thumb function symbols.
26401              On ARMv6M this is +4, not +8.  */
26402           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26403           assemble_name (file, labelpc);
26404           fputs (":\n", file);
26405           if (TARGET_THUMB1_ONLY)
26406             {
26407               /* This is 2 insns after the start of the thunk, so we know it
26408                  is 4-byte aligned.  */
26409               fputs ("\tadd\tr3, pc, r3\n", file);
26410               fputs ("\tmov r12, r3\n", file);
26411             }
26412           else
26413             fputs ("\tadd\tr12, pc, r12\n", file);
26414         }
26415       else if (TARGET_THUMB1_ONLY)
26416         fputs ("\tmov r12, r3\n", file);
26417     }
26418   if (TARGET_THUMB1_ONLY)
26419     {
26420       if (mi_delta > 255)
26421         {
26422           fputs ("\tldr\tr3, ", file);
26423           assemble_name (file, label);
26424           fputs ("+4\n", file);
26425           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26426                        mi_op, this_regno, this_regno);
26427         }
26428       else if (mi_delta != 0)
26429         {
26430           /* Thumb1 unified syntax requires s suffix in instruction name when
26431              one of the operands is immediate.  */
26432           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26433                        mi_op, this_regno, this_regno,
26434                        mi_delta);
26435         }
26436     }
26437   else
26438     {
26439       /* TODO: Use movw/movt for large constants when available.  */
26440       while (mi_delta != 0)
26441         {
26442           if ((mi_delta & (3 << shift)) == 0)
26443             shift += 2;
26444           else
26445             {
26446               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26447                            mi_op, this_regno, this_regno,
26448                            mi_delta & (0xff << shift));
26449               mi_delta &= ~(0xff << shift);
26450               shift += 8;
26451             }
26452         }
26453     }
26454   if (TARGET_THUMB1)
26455     {
26456       if (TARGET_THUMB1_ONLY)
26457         fputs ("\tpop\t{r3}\n", file);
26458
26459       fprintf (file, "\tbx\tr12\n");
26460       ASM_OUTPUT_ALIGN (file, 2);
26461       assemble_name (file, label);
26462       fputs (":\n", file);
26463       if (flag_pic)
26464         {
26465           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26466           rtx tem = XEXP (DECL_RTL (function), 0);
26467           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26468              pipeline offset is four rather than eight.  Adjust the offset
26469              accordingly.  */
26470           tem = plus_constant (GET_MODE (tem), tem,
26471                                TARGET_THUMB1_ONLY ? -3 : -7);
26472           tem = gen_rtx_MINUS (GET_MODE (tem),
26473                                tem,
26474                                gen_rtx_SYMBOL_REF (Pmode,
26475                                                    ggc_strdup (labelpc)));
26476           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26477         }
26478       else
26479         /* Output ".word .LTHUNKn".  */
26480         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26481
26482       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26483         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26484     }
26485   else
26486     {
26487       fputs ("\tb\t", file);
26488       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26489       if (NEED_PLT_RELOC)
26490         fputs ("(PLT)", file);
26491       fputc ('\n', file);
26492     }
26493
26494   final_end_function ();
26495 }
26496
26497 /* MI thunk handling for TARGET_32BIT.  */
26498
26499 static void
26500 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26501                        HOST_WIDE_INT vcall_offset, tree function)
26502 {
26503   /* On ARM, this_regno is R0 or R1 depending on
26504      whether the function returns an aggregate or not.
26505   */
26506   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26507                                        function)
26508                     ? R1_REGNUM : R0_REGNUM);
26509
26510   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26511   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26512   reload_completed = 1;
26513   emit_note (NOTE_INSN_PROLOGUE_END);
26514
26515   /* Add DELTA to THIS_RTX.  */
26516   if (delta != 0)
26517     arm_split_constant (PLUS, Pmode, NULL_RTX,
26518                         delta, this_rtx, this_rtx, false);
26519
26520   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26521   if (vcall_offset != 0)
26522     {
26523       /* Load *THIS_RTX.  */
26524       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26525       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26526       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26527                           false);
26528       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26529       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26530       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26531     }
26532
26533   /* Generate a tail call to the target function.  */
26534   if (!TREE_USED (function))
26535     {
26536       assemble_external (function);
26537       TREE_USED (function) = 1;
26538     }
26539   rtx funexp = XEXP (DECL_RTL (function), 0);
26540   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26541   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26542   SIBLING_CALL_P (insn) = 1;
26543
26544   insn = get_insns ();
26545   shorten_branches (insn);
26546   final_start_function (insn, file, 1);
26547   final (insn, file, 1);
26548   final_end_function ();
26549
26550   /* Stop pretending this is a post-reload pass.  */
26551   reload_completed = 0;
26552 }
26553
26554 /* Output code to add DELTA to the first argument, and then jump
26555    to FUNCTION.  Used for C++ multiple inheritance.  */
26556
26557 static void
26558 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26559                      HOST_WIDE_INT vcall_offset, tree function)
26560 {
26561   if (TARGET_32BIT)
26562     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26563   else
26564     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26565 }
26566
26567 int
26568 arm_emit_vector_const (FILE *file, rtx x)
26569 {
26570   int i;
26571   const char * pattern;
26572
26573   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26574
26575   switch (GET_MODE (x))
26576     {
26577     case E_V2SImode: pattern = "%08x"; break;
26578     case E_V4HImode: pattern = "%04x"; break;
26579     case E_V8QImode: pattern = "%02x"; break;
26580     default:       gcc_unreachable ();
26581     }
26582
26583   fprintf (file, "0x");
26584   for (i = CONST_VECTOR_NUNITS (x); i--;)
26585     {
26586       rtx element;
26587
26588       element = CONST_VECTOR_ELT (x, i);
26589       fprintf (file, pattern, INTVAL (element));
26590     }
26591
26592   return 1;
26593 }
26594
26595 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26596    HFmode constant pool entries are actually loaded with ldr.  */
26597 void
26598 arm_emit_fp16_const (rtx c)
26599 {
26600   long bits;
26601
26602   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26603   if (WORDS_BIG_ENDIAN)
26604     assemble_zeros (2);
26605   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26606   if (!WORDS_BIG_ENDIAN)
26607     assemble_zeros (2);
26608 }
26609
26610 const char *
26611 arm_output_load_gr (rtx *operands)
26612 {
26613   rtx reg;
26614   rtx offset;
26615   rtx wcgr;
26616   rtx sum;
26617
26618   if (!MEM_P (operands [1])
26619       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26620       || !REG_P (reg = XEXP (sum, 0))
26621       || !CONST_INT_P (offset = XEXP (sum, 1))
26622       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26623     return "wldrw%?\t%0, %1";
26624
26625   /* Fix up an out-of-range load of a GR register.  */
26626   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26627   wcgr = operands[0];
26628   operands[0] = reg;
26629   output_asm_insn ("ldr%?\t%0, %1", operands);
26630
26631   operands[0] = wcgr;
26632   operands[1] = reg;
26633   output_asm_insn ("tmcr%?\t%0, %1", operands);
26634   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26635
26636   return "";
26637 }
26638
26639 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26640
26641    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26642    named arg and all anonymous args onto the stack.
26643    XXX I know the prologue shouldn't be pushing registers, but it is faster
26644    that way.  */
26645
26646 static void
26647 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26648                             machine_mode mode,
26649                             tree type,
26650                             int *pretend_size,
26651                             int second_time ATTRIBUTE_UNUSED)
26652 {
26653   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26654   int nregs;
26655
26656   cfun->machine->uses_anonymous_args = 1;
26657   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26658     {
26659       nregs = pcum->aapcs_ncrn;
26660       if (nregs & 1)
26661         {
26662           int res = arm_needs_doubleword_align (mode, type);
26663           if (res < 0 && warn_psabi)
26664             inform (input_location, "parameter passing for argument of "
26665                     "type %qT changed in GCC 7.1", type);
26666           else if (res > 0)
26667             nregs++;
26668         }
26669     }
26670   else
26671     nregs = pcum->nregs;
26672
26673   if (nregs < NUM_ARG_REGS)
26674     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26675 }
26676
26677 /* We can't rely on the caller doing the proper promotion when
26678    using APCS or ATPCS.  */
26679
26680 static bool
26681 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26682 {
26683     return !TARGET_AAPCS_BASED;
26684 }
26685
26686 static machine_mode
26687 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26688                            machine_mode mode,
26689                            int *punsignedp ATTRIBUTE_UNUSED,
26690                            const_tree fntype ATTRIBUTE_UNUSED,
26691                            int for_return ATTRIBUTE_UNUSED)
26692 {
26693   if (GET_MODE_CLASS (mode) == MODE_INT
26694       && GET_MODE_SIZE (mode) < 4)
26695     return SImode;
26696
26697   return mode;
26698 }
26699
26700
26701 static bool
26702 arm_default_short_enums (void)
26703 {
26704   return ARM_DEFAULT_SHORT_ENUMS;
26705 }
26706
26707
26708 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26709
26710 static bool
26711 arm_align_anon_bitfield (void)
26712 {
26713   return TARGET_AAPCS_BASED;
26714 }
26715
26716
26717 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26718
26719 static tree
26720 arm_cxx_guard_type (void)
26721 {
26722   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26723 }
26724
26725
26726 /* The EABI says test the least significant bit of a guard variable.  */
26727
26728 static bool
26729 arm_cxx_guard_mask_bit (void)
26730 {
26731   return TARGET_AAPCS_BASED;
26732 }
26733
26734
26735 /* The EABI specifies that all array cookies are 8 bytes long.  */
26736
26737 static tree
26738 arm_get_cookie_size (tree type)
26739 {
26740   tree size;
26741
26742   if (!TARGET_AAPCS_BASED)
26743     return default_cxx_get_cookie_size (type);
26744
26745   size = build_int_cst (sizetype, 8);
26746   return size;
26747 }
26748
26749
26750 /* The EABI says that array cookies should also contain the element size.  */
26751
26752 static bool
26753 arm_cookie_has_size (void)
26754 {
26755   return TARGET_AAPCS_BASED;
26756 }
26757
26758
26759 /* The EABI says constructors and destructors should return a pointer to
26760    the object constructed/destroyed.  */
26761
26762 static bool
26763 arm_cxx_cdtor_returns_this (void)
26764 {
26765   return TARGET_AAPCS_BASED;
26766 }
26767
26768 /* The EABI says that an inline function may never be the key
26769    method.  */
26770
26771 static bool
26772 arm_cxx_key_method_may_be_inline (void)
26773 {
26774   return !TARGET_AAPCS_BASED;
26775 }
26776
26777 static void
26778 arm_cxx_determine_class_data_visibility (tree decl)
26779 {
26780   if (!TARGET_AAPCS_BASED
26781       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26782     return;
26783
26784   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26785      is exported.  However, on systems without dynamic vague linkage,
26786      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26787   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26788     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26789   else
26790     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26791   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26792 }
26793
26794 static bool
26795 arm_cxx_class_data_always_comdat (void)
26796 {
26797   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26798      vague linkage if the class has no key function.  */
26799   return !TARGET_AAPCS_BASED;
26800 }
26801
26802
26803 /* The EABI says __aeabi_atexit should be used to register static
26804    destructors.  */
26805
26806 static bool
26807 arm_cxx_use_aeabi_atexit (void)
26808 {
26809   return TARGET_AAPCS_BASED;
26810 }
26811
26812
26813 void
26814 arm_set_return_address (rtx source, rtx scratch)
26815 {
26816   arm_stack_offsets *offsets;
26817   HOST_WIDE_INT delta;
26818   rtx addr;
26819   unsigned long saved_regs;
26820
26821   offsets = arm_get_frame_offsets ();
26822   saved_regs = offsets->saved_regs_mask;
26823
26824   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26825     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26826   else
26827     {
26828       if (frame_pointer_needed)
26829         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26830       else
26831         {
26832           /* LR will be the first saved register.  */
26833           delta = offsets->outgoing_args - (offsets->frame + 4);
26834
26835
26836           if (delta >= 4096)
26837             {
26838               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26839                                      GEN_INT (delta & ~4095)));
26840               addr = scratch;
26841               delta &= 4095;
26842             }
26843           else
26844             addr = stack_pointer_rtx;
26845
26846           addr = plus_constant (Pmode, addr, delta);
26847         }
26848       /* The store needs to be marked as frame related in order to prevent
26849          DSE from deleting it as dead if it is based on fp.  */
26850       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26851       RTX_FRAME_RELATED_P (insn) = 1;
26852       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26853     }
26854 }
26855
26856
26857 void
26858 thumb_set_return_address (rtx source, rtx scratch)
26859 {
26860   arm_stack_offsets *offsets;
26861   HOST_WIDE_INT delta;
26862   HOST_WIDE_INT limit;
26863   int reg;
26864   rtx addr;
26865   unsigned long mask;
26866
26867   emit_use (source);
26868
26869   offsets = arm_get_frame_offsets ();
26870   mask = offsets->saved_regs_mask;
26871   if (mask & (1 << LR_REGNUM))
26872     {
26873       limit = 1024;
26874       /* Find the saved regs.  */
26875       if (frame_pointer_needed)
26876         {
26877           delta = offsets->soft_frame - offsets->saved_args;
26878           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26879           if (TARGET_THUMB1)
26880             limit = 128;
26881         }
26882       else
26883         {
26884           delta = offsets->outgoing_args - offsets->saved_args;
26885           reg = SP_REGNUM;
26886         }
26887       /* Allow for the stack frame.  */
26888       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26889         delta -= 16;
26890       /* The link register is always the first saved register.  */
26891       delta -= 4;
26892
26893       /* Construct the address.  */
26894       addr = gen_rtx_REG (SImode, reg);
26895       if (delta > limit)
26896         {
26897           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26898           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26899           addr = scratch;
26900         }
26901       else
26902         addr = plus_constant (Pmode, addr, delta);
26903
26904       /* The store needs to be marked as frame related in order to prevent
26905          DSE from deleting it as dead if it is based on fp.  */
26906       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26907       RTX_FRAME_RELATED_P (insn) = 1;
26908       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26909     }
26910   else
26911     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26912 }
26913
26914 /* Implements target hook vector_mode_supported_p.  */
26915 bool
26916 arm_vector_mode_supported_p (machine_mode mode)
26917 {
26918   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26919   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26920       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26921       || mode == V2DImode || mode == V8HFmode))
26922     return true;
26923
26924   if ((TARGET_NEON || TARGET_IWMMXT)
26925       && ((mode == V2SImode)
26926           || (mode == V4HImode)
26927           || (mode == V8QImode)))
26928     return true;
26929
26930   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26931       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26932       || mode == V2HAmode))
26933     return true;
26934
26935   return false;
26936 }
26937
26938 /* Implements target hook array_mode_supported_p.  */
26939
26940 static bool
26941 arm_array_mode_supported_p (machine_mode mode,
26942                             unsigned HOST_WIDE_INT nelems)
26943 {
26944   if (TARGET_NEON
26945       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26946       && (nelems >= 2 && nelems <= 4))
26947     return true;
26948
26949   return false;
26950 }
26951
26952 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26953    registers when autovectorizing for Neon, at least until multiple vector
26954    widths are supported properly by the middle-end.  */
26955
26956 static machine_mode
26957 arm_preferred_simd_mode (scalar_mode mode)
26958 {
26959   if (TARGET_NEON)
26960     switch (mode)
26961       {
26962       case E_SFmode:
26963         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26964       case E_SImode:
26965         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26966       case E_HImode:
26967         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26968       case E_QImode:
26969         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26970       case E_DImode:
26971         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26972           return V2DImode;
26973         break;
26974
26975       default:;
26976       }
26977
26978   if (TARGET_REALLY_IWMMXT)
26979     switch (mode)
26980       {
26981       case E_SImode:
26982         return V2SImode;
26983       case E_HImode:
26984         return V4HImode;
26985       case E_QImode:
26986         return V8QImode;
26987
26988       default:;
26989       }
26990
26991   return word_mode;
26992 }
26993
26994 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26995
26996    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26997    using r0-r4 for function arguments, r7 for the stack frame and don't have
26998    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26999    potentially problematic instructions accept high registers so this is not
27000    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27001    that require many low registers.  */
27002 static bool
27003 arm_class_likely_spilled_p (reg_class_t rclass)
27004 {
27005   if ((TARGET_THUMB1 && rclass == LO_REGS)
27006       || rclass  == CC_REG)
27007     return true;
27008
27009   return false;
27010 }
27011
27012 /* Implements target hook small_register_classes_for_mode_p.  */
27013 bool
27014 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27015 {
27016   return TARGET_THUMB1;
27017 }
27018
27019 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27020    ARM insns and therefore guarantee that the shift count is modulo 256.
27021    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27022    guarantee no particular behavior for out-of-range counts.  */
27023
27024 static unsigned HOST_WIDE_INT
27025 arm_shift_truncation_mask (machine_mode mode)
27026 {
27027   return mode == SImode ? 255 : 0;
27028 }
27029
27030
27031 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27032
27033 unsigned int
27034 arm_dbx_register_number (unsigned int regno)
27035 {
27036   if (regno < 16)
27037     return regno;
27038
27039   if (IS_VFP_REGNUM (regno))
27040     {
27041       /* See comment in arm_dwarf_register_span.  */
27042       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27043         return 64 + regno - FIRST_VFP_REGNUM;
27044       else
27045         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27046     }
27047
27048   if (IS_IWMMXT_GR_REGNUM (regno))
27049     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27050
27051   if (IS_IWMMXT_REGNUM (regno))
27052     return 112 + regno - FIRST_IWMMXT_REGNUM;
27053
27054   return DWARF_FRAME_REGISTERS;
27055 }
27056
27057 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27058    GCC models tham as 64 32-bit registers, so we need to describe this to
27059    the DWARF generation code.  Other registers can use the default.  */
27060 static rtx
27061 arm_dwarf_register_span (rtx rtl)
27062 {
27063   machine_mode mode;
27064   unsigned regno;
27065   rtx parts[16];
27066   int nregs;
27067   int i;
27068
27069   regno = REGNO (rtl);
27070   if (!IS_VFP_REGNUM (regno))
27071     return NULL_RTX;
27072
27073   /* XXX FIXME: The EABI defines two VFP register ranges:
27074         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27075         256-287: D0-D31
27076      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27077      corresponding D register.  Until GDB supports this, we shall use the
27078      legacy encodings.  We also use these encodings for D0-D15 for
27079      compatibility with older debuggers.  */
27080   mode = GET_MODE (rtl);
27081   if (GET_MODE_SIZE (mode) < 8)
27082     return NULL_RTX;
27083
27084   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27085     {
27086       nregs = GET_MODE_SIZE (mode) / 4;
27087       for (i = 0; i < nregs; i += 2)
27088         if (TARGET_BIG_END)
27089           {
27090             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27091             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27092           }
27093         else
27094           {
27095             parts[i] = gen_rtx_REG (SImode, regno + i);
27096             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27097           }
27098     }
27099   else
27100     {
27101       nregs = GET_MODE_SIZE (mode) / 8;
27102       for (i = 0; i < nregs; i++)
27103         parts[i] = gen_rtx_REG (DImode, regno + i);
27104     }
27105
27106   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27107 }
27108
27109 #if ARM_UNWIND_INFO
27110 /* Emit unwind directives for a store-multiple instruction or stack pointer
27111    push during alignment.
27112    These should only ever be generated by the function prologue code, so
27113    expect them to have a particular form.
27114    The store-multiple instruction sometimes pushes pc as the last register,
27115    although it should not be tracked into unwind information, or for -Os
27116    sometimes pushes some dummy registers before first register that needs
27117    to be tracked in unwind information; such dummy registers are there just
27118    to avoid separate stack adjustment, and will not be restored in the
27119    epilogue.  */
27120
27121 static void
27122 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27123 {
27124   int i;
27125   HOST_WIDE_INT offset;
27126   HOST_WIDE_INT nregs;
27127   int reg_size;
27128   unsigned reg;
27129   unsigned lastreg;
27130   unsigned padfirst = 0, padlast = 0;
27131   rtx e;
27132
27133   e = XVECEXP (p, 0, 0);
27134   gcc_assert (GET_CODE (e) == SET);
27135
27136   /* First insn will adjust the stack pointer.  */
27137   gcc_assert (GET_CODE (e) == SET
27138               && REG_P (SET_DEST (e))
27139               && REGNO (SET_DEST (e)) == SP_REGNUM
27140               && GET_CODE (SET_SRC (e)) == PLUS);
27141
27142   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27143   nregs = XVECLEN (p, 0) - 1;
27144   gcc_assert (nregs);
27145
27146   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27147   if (reg < 16)
27148     {
27149       /* For -Os dummy registers can be pushed at the beginning to
27150          avoid separate stack pointer adjustment.  */
27151       e = XVECEXP (p, 0, 1);
27152       e = XEXP (SET_DEST (e), 0);
27153       if (GET_CODE (e) == PLUS)
27154         padfirst = INTVAL (XEXP (e, 1));
27155       gcc_assert (padfirst == 0 || optimize_size);
27156       /* The function prologue may also push pc, but not annotate it as it is
27157          never restored.  We turn this into a stack pointer adjustment.  */
27158       e = XVECEXP (p, 0, nregs);
27159       e = XEXP (SET_DEST (e), 0);
27160       if (GET_CODE (e) == PLUS)
27161         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27162       else
27163         padlast = offset - 4;
27164       gcc_assert (padlast == 0 || padlast == 4);
27165       if (padlast == 4)
27166         fprintf (asm_out_file, "\t.pad #4\n");
27167       reg_size = 4;
27168       fprintf (asm_out_file, "\t.save {");
27169     }
27170   else if (IS_VFP_REGNUM (reg))
27171     {
27172       reg_size = 8;
27173       fprintf (asm_out_file, "\t.vsave {");
27174     }
27175   else
27176     /* Unknown register type.  */
27177     gcc_unreachable ();
27178
27179   /* If the stack increment doesn't match the size of the saved registers,
27180      something has gone horribly wrong.  */
27181   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27182
27183   offset = padfirst;
27184   lastreg = 0;
27185   /* The remaining insns will describe the stores.  */
27186   for (i = 1; i <= nregs; i++)
27187     {
27188       /* Expect (set (mem <addr>) (reg)).
27189          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27190       e = XVECEXP (p, 0, i);
27191       gcc_assert (GET_CODE (e) == SET
27192                   && MEM_P (SET_DEST (e))
27193                   && REG_P (SET_SRC (e)));
27194
27195       reg = REGNO (SET_SRC (e));
27196       gcc_assert (reg >= lastreg);
27197
27198       if (i != 1)
27199         fprintf (asm_out_file, ", ");
27200       /* We can't use %r for vfp because we need to use the
27201          double precision register names.  */
27202       if (IS_VFP_REGNUM (reg))
27203         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27204       else
27205         asm_fprintf (asm_out_file, "%r", reg);
27206
27207       if (flag_checking)
27208         {
27209           /* Check that the addresses are consecutive.  */
27210           e = XEXP (SET_DEST (e), 0);
27211           if (GET_CODE (e) == PLUS)
27212             gcc_assert (REG_P (XEXP (e, 0))
27213                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27214                         && CONST_INT_P (XEXP (e, 1))
27215                         && offset == INTVAL (XEXP (e, 1)));
27216           else
27217             gcc_assert (i == 1
27218                         && REG_P (e)
27219                         && REGNO (e) == SP_REGNUM);
27220           offset += reg_size;
27221         }
27222     }
27223   fprintf (asm_out_file, "}\n");
27224   if (padfirst)
27225     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27226 }
27227
27228 /*  Emit unwind directives for a SET.  */
27229
27230 static void
27231 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27232 {
27233   rtx e0;
27234   rtx e1;
27235   unsigned reg;
27236
27237   e0 = XEXP (p, 0);
27238   e1 = XEXP (p, 1);
27239   switch (GET_CODE (e0))
27240     {
27241     case MEM:
27242       /* Pushing a single register.  */
27243       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27244           || !REG_P (XEXP (XEXP (e0, 0), 0))
27245           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27246         abort ();
27247
27248       asm_fprintf (asm_out_file, "\t.save ");
27249       if (IS_VFP_REGNUM (REGNO (e1)))
27250         asm_fprintf(asm_out_file, "{d%d}\n",
27251                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27252       else
27253         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27254       break;
27255
27256     case REG:
27257       if (REGNO (e0) == SP_REGNUM)
27258         {
27259           /* A stack increment.  */
27260           if (GET_CODE (e1) != PLUS
27261               || !REG_P (XEXP (e1, 0))
27262               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27263               || !CONST_INT_P (XEXP (e1, 1)))
27264             abort ();
27265
27266           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27267                        -INTVAL (XEXP (e1, 1)));
27268         }
27269       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27270         {
27271           HOST_WIDE_INT offset;
27272
27273           if (GET_CODE (e1) == PLUS)
27274             {
27275               if (!REG_P (XEXP (e1, 0))
27276                   || !CONST_INT_P (XEXP (e1, 1)))
27277                 abort ();
27278               reg = REGNO (XEXP (e1, 0));
27279               offset = INTVAL (XEXP (e1, 1));
27280               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27281                            HARD_FRAME_POINTER_REGNUM, reg,
27282                            offset);
27283             }
27284           else if (REG_P (e1))
27285             {
27286               reg = REGNO (e1);
27287               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27288                            HARD_FRAME_POINTER_REGNUM, reg);
27289             }
27290           else
27291             abort ();
27292         }
27293       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27294         {
27295           /* Move from sp to reg.  */
27296           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27297         }
27298      else if (GET_CODE (e1) == PLUS
27299               && REG_P (XEXP (e1, 0))
27300               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27301               && CONST_INT_P (XEXP (e1, 1)))
27302         {
27303           /* Set reg to offset from sp.  */
27304           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27305                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27306         }
27307       else
27308         abort ();
27309       break;
27310
27311     default:
27312       abort ();
27313     }
27314 }
27315
27316
27317 /* Emit unwind directives for the given insn.  */
27318
27319 static void
27320 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27321 {
27322   rtx note, pat;
27323   bool handled_one = false;
27324
27325   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27326     return;
27327
27328   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27329       && (TREE_NOTHROW (current_function_decl)
27330           || crtl->all_throwers_are_sibcalls))
27331     return;
27332
27333   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27334     return;
27335
27336   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27337     {
27338       switch (REG_NOTE_KIND (note))
27339         {
27340         case REG_FRAME_RELATED_EXPR:
27341           pat = XEXP (note, 0);
27342           goto found;
27343
27344         case REG_CFA_REGISTER:
27345           pat = XEXP (note, 0);
27346           if (pat == NULL)
27347             {
27348               pat = PATTERN (insn);
27349               if (GET_CODE (pat) == PARALLEL)
27350                 pat = XVECEXP (pat, 0, 0);
27351             }
27352
27353           /* Only emitted for IS_STACKALIGN re-alignment.  */
27354           {
27355             rtx dest, src;
27356             unsigned reg;
27357
27358             src = SET_SRC (pat);
27359             dest = SET_DEST (pat);
27360
27361             gcc_assert (src == stack_pointer_rtx);
27362             reg = REGNO (dest);
27363             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27364                          reg + 0x90, reg);
27365           }
27366           handled_one = true;
27367           break;
27368
27369         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27370            to get correct dwarf information for shrink-wrap.  We should not
27371            emit unwind information for it because these are used either for
27372            pretend arguments or notes to adjust sp and restore registers from
27373            stack.  */
27374         case REG_CFA_DEF_CFA:
27375         case REG_CFA_ADJUST_CFA:
27376         case REG_CFA_RESTORE:
27377           return;
27378
27379         case REG_CFA_EXPRESSION:
27380         case REG_CFA_OFFSET:
27381           /* ??? Only handling here what we actually emit.  */
27382           gcc_unreachable ();
27383
27384         default:
27385           break;
27386         }
27387     }
27388   if (handled_one)
27389     return;
27390   pat = PATTERN (insn);
27391  found:
27392
27393   switch (GET_CODE (pat))
27394     {
27395     case SET:
27396       arm_unwind_emit_set (asm_out_file, pat);
27397       break;
27398
27399     case SEQUENCE:
27400       /* Store multiple.  */
27401       arm_unwind_emit_sequence (asm_out_file, pat);
27402       break;
27403
27404     default:
27405       abort();
27406     }
27407 }
27408
27409
27410 /* Output a reference from a function exception table to the type_info
27411    object X.  The EABI specifies that the symbol should be relocated by
27412    an R_ARM_TARGET2 relocation.  */
27413
27414 static bool
27415 arm_output_ttype (rtx x)
27416 {
27417   fputs ("\t.word\t", asm_out_file);
27418   output_addr_const (asm_out_file, x);
27419   /* Use special relocations for symbol references.  */
27420   if (!CONST_INT_P (x))
27421     fputs ("(TARGET2)", asm_out_file);
27422   fputc ('\n', asm_out_file);
27423
27424   return TRUE;
27425 }
27426
27427 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27428
27429 static void
27430 arm_asm_emit_except_personality (rtx personality)
27431 {
27432   fputs ("\t.personality\t", asm_out_file);
27433   output_addr_const (asm_out_file, personality);
27434   fputc ('\n', asm_out_file);
27435 }
27436 #endif /* ARM_UNWIND_INFO */
27437
27438 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27439
27440 static void
27441 arm_asm_init_sections (void)
27442 {
27443 #if ARM_UNWIND_INFO
27444   exception_section = get_unnamed_section (0, output_section_asm_op,
27445                                            "\t.handlerdata");
27446 #endif /* ARM_UNWIND_INFO */
27447
27448 #ifdef OBJECT_FORMAT_ELF
27449   if (target_pure_code)
27450     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27451 #endif
27452 }
27453
27454 /* Output unwind directives for the start/end of a function.  */
27455
27456 void
27457 arm_output_fn_unwind (FILE * f, bool prologue)
27458 {
27459   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27460     return;
27461
27462   if (prologue)
27463     fputs ("\t.fnstart\n", f);
27464   else
27465     {
27466       /* If this function will never be unwound, then mark it as such.
27467          The came condition is used in arm_unwind_emit to suppress
27468          the frame annotations.  */
27469       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27470           && (TREE_NOTHROW (current_function_decl)
27471               || crtl->all_throwers_are_sibcalls))
27472         fputs("\t.cantunwind\n", f);
27473
27474       fputs ("\t.fnend\n", f);
27475     }
27476 }
27477
27478 static bool
27479 arm_emit_tls_decoration (FILE *fp, rtx x)
27480 {
27481   enum tls_reloc reloc;
27482   rtx val;
27483
27484   val = XVECEXP (x, 0, 0);
27485   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27486
27487   output_addr_const (fp, val);
27488
27489   switch (reloc)
27490     {
27491     case TLS_GD32:
27492       fputs ("(tlsgd)", fp);
27493       break;
27494     case TLS_LDM32:
27495       fputs ("(tlsldm)", fp);
27496       break;
27497     case TLS_LDO32:
27498       fputs ("(tlsldo)", fp);
27499       break;
27500     case TLS_IE32:
27501       fputs ("(gottpoff)", fp);
27502       break;
27503     case TLS_LE32:
27504       fputs ("(tpoff)", fp);
27505       break;
27506     case TLS_DESCSEQ:
27507       fputs ("(tlsdesc)", fp);
27508       break;
27509     default:
27510       gcc_unreachable ();
27511     }
27512
27513   switch (reloc)
27514     {
27515     case TLS_GD32:
27516     case TLS_LDM32:
27517     case TLS_IE32:
27518     case TLS_DESCSEQ:
27519       fputs (" + (. - ", fp);
27520       output_addr_const (fp, XVECEXP (x, 0, 2));
27521       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27522       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27523       output_addr_const (fp, XVECEXP (x, 0, 3));
27524       fputc (')', fp);
27525       break;
27526     default:
27527       break;
27528     }
27529
27530   return TRUE;
27531 }
27532
27533 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27534
27535 static void
27536 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27537 {
27538   gcc_assert (size == 4);
27539   fputs ("\t.word\t", file);
27540   output_addr_const (file, x);
27541   fputs ("(tlsldo)", file);
27542 }
27543
27544 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27545
27546 static bool
27547 arm_output_addr_const_extra (FILE *fp, rtx x)
27548 {
27549   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27550     return arm_emit_tls_decoration (fp, x);
27551   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27552     {
27553       char label[256];
27554       int labelno = INTVAL (XVECEXP (x, 0, 0));
27555
27556       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27557       assemble_name_raw (fp, label);
27558
27559       return TRUE;
27560     }
27561   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27562     {
27563       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27564       if (GOT_PCREL)
27565         fputs ("+.", fp);
27566       fputs ("-(", fp);
27567       output_addr_const (fp, XVECEXP (x, 0, 0));
27568       fputc (')', fp);
27569       return TRUE;
27570     }
27571   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27572     {
27573       output_addr_const (fp, XVECEXP (x, 0, 0));
27574       if (GOT_PCREL)
27575         fputs ("+.", fp);
27576       fputs ("-(", fp);
27577       output_addr_const (fp, XVECEXP (x, 0, 1));
27578       fputc (')', fp);
27579       return TRUE;
27580     }
27581   else if (GET_CODE (x) == CONST_VECTOR)
27582     return arm_emit_vector_const (fp, x);
27583
27584   return FALSE;
27585 }
27586
27587 /* Output assembly for a shift instruction.
27588    SET_FLAGS determines how the instruction modifies the condition codes.
27589    0 - Do not set condition codes.
27590    1 - Set condition codes.
27591    2 - Use smallest instruction.  */
27592 const char *
27593 arm_output_shift(rtx * operands, int set_flags)
27594 {
27595   char pattern[100];
27596   static const char flag_chars[3] = {'?', '.', '!'};
27597   const char *shift;
27598   HOST_WIDE_INT val;
27599   char c;
27600
27601   c = flag_chars[set_flags];
27602   shift = shift_op(operands[3], &val);
27603   if (shift)
27604     {
27605       if (val != -1)
27606         operands[2] = GEN_INT(val);
27607       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27608     }
27609   else
27610     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27611
27612   output_asm_insn (pattern, operands);
27613   return "";
27614 }
27615
27616 /* Output assembly for a WMMX immediate shift instruction.  */
27617 const char *
27618 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27619 {
27620   int shift = INTVAL (operands[2]);
27621   char templ[50];
27622   machine_mode opmode = GET_MODE (operands[0]);
27623
27624   gcc_assert (shift >= 0);
27625
27626   /* If the shift value in the register versions is > 63 (for D qualifier),
27627      31 (for W qualifier) or 15 (for H qualifier).  */
27628   if (((opmode == V4HImode) && (shift > 15))
27629         || ((opmode == V2SImode) && (shift > 31))
27630         || ((opmode == DImode) && (shift > 63)))
27631   {
27632     if (wror_or_wsra)
27633       {
27634         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27635         output_asm_insn (templ, operands);
27636         if (opmode == DImode)
27637           {
27638             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27639             output_asm_insn (templ, operands);
27640           }
27641       }
27642     else
27643       {
27644         /* The destination register will contain all zeros.  */
27645         sprintf (templ, "wzero\t%%0");
27646         output_asm_insn (templ, operands);
27647       }
27648     return "";
27649   }
27650
27651   if ((opmode == DImode) && (shift > 32))
27652     {
27653       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27654       output_asm_insn (templ, operands);
27655       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27656       output_asm_insn (templ, operands);
27657     }
27658   else
27659     {
27660       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27661       output_asm_insn (templ, operands);
27662     }
27663   return "";
27664 }
27665
27666 /* Output assembly for a WMMX tinsr instruction.  */
27667 const char *
27668 arm_output_iwmmxt_tinsr (rtx *operands)
27669 {
27670   int mask = INTVAL (operands[3]);
27671   int i;
27672   char templ[50];
27673   int units = mode_nunits[GET_MODE (operands[0])];
27674   gcc_assert ((mask & (mask - 1)) == 0);
27675   for (i = 0; i < units; ++i)
27676     {
27677       if ((mask & 0x01) == 1)
27678         {
27679           break;
27680         }
27681       mask >>= 1;
27682     }
27683   gcc_assert (i < units);
27684   {
27685     switch (GET_MODE (operands[0]))
27686       {
27687       case E_V8QImode:
27688         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27689         break;
27690       case E_V4HImode:
27691         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27692         break;
27693       case E_V2SImode:
27694         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27695         break;
27696       default:
27697         gcc_unreachable ();
27698         break;
27699       }
27700     output_asm_insn (templ, operands);
27701   }
27702   return "";
27703 }
27704
27705 /* Output a Thumb-1 casesi dispatch sequence.  */
27706 const char *
27707 thumb1_output_casesi (rtx *operands)
27708 {
27709   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27710
27711   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27712
27713   switch (GET_MODE(diff_vec))
27714     {
27715     case E_QImode:
27716       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27717               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27718     case E_HImode:
27719       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27720               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27721     case E_SImode:
27722       return "bl\t%___gnu_thumb1_case_si";
27723     default:
27724       gcc_unreachable ();
27725     }
27726 }
27727
27728 /* Output a Thumb-2 casesi instruction.  */
27729 const char *
27730 thumb2_output_casesi (rtx *operands)
27731 {
27732   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27733
27734   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27735
27736   output_asm_insn ("cmp\t%0, %1", operands);
27737   output_asm_insn ("bhi\t%l3", operands);
27738   switch (GET_MODE(diff_vec))
27739     {
27740     case E_QImode:
27741       return "tbb\t[%|pc, %0]";
27742     case E_HImode:
27743       return "tbh\t[%|pc, %0, lsl #1]";
27744     case E_SImode:
27745       if (flag_pic)
27746         {
27747           output_asm_insn ("adr\t%4, %l2", operands);
27748           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27749           output_asm_insn ("add\t%4, %4, %5", operands);
27750           return "bx\t%4";
27751         }
27752       else
27753         {
27754           output_asm_insn ("adr\t%4, %l2", operands);
27755           return "ldr\t%|pc, [%4, %0, lsl #2]";
27756         }
27757     default:
27758       gcc_unreachable ();
27759     }
27760 }
27761
27762 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27763    per-core tuning structs.  */
27764 static int
27765 arm_issue_rate (void)
27766 {
27767   return current_tune->issue_rate;
27768 }
27769
27770 /* Return how many instructions should scheduler lookahead to choose the
27771    best one.  */
27772 static int
27773 arm_first_cycle_multipass_dfa_lookahead (void)
27774 {
27775   int issue_rate = arm_issue_rate ();
27776
27777   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27778 }
27779
27780 /* Enable modeling of L2 auto-prefetcher.  */
27781 static int
27782 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27783 {
27784   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27785 }
27786
27787 const char *
27788 arm_mangle_type (const_tree type)
27789 {
27790   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27791      has to be managled as if it is in the "std" namespace.  */
27792   if (TARGET_AAPCS_BASED
27793       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27794     return "St9__va_list";
27795
27796   /* Half-precision float.  */
27797   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27798     return "Dh";
27799
27800   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27801      builtin type.  */
27802   if (TYPE_NAME (type) != NULL)
27803     return arm_mangle_builtin_type (type);
27804
27805   /* Use the default mangling.  */
27806   return NULL;
27807 }
27808
27809 /* Order of allocation of core registers for Thumb: this allocation is
27810    written over the corresponding initial entries of the array
27811    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27812    first.  Saving and restoring a low register is usually cheaper than
27813    using a call-clobbered high register.  */
27814
27815 static const int thumb_core_reg_alloc_order[] =
27816 {
27817    3,  2,  1,  0,  4,  5,  6,  7,
27818   12, 14,  8,  9, 10, 11
27819 };
27820
27821 /* Adjust register allocation order when compiling for Thumb.  */
27822
27823 void
27824 arm_order_regs_for_local_alloc (void)
27825 {
27826   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27827   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27828   if (TARGET_THUMB)
27829     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27830             sizeof (thumb_core_reg_alloc_order));
27831 }
27832
27833 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27834
27835 bool
27836 arm_frame_pointer_required (void)
27837 {
27838   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27839     return true;
27840
27841   /* If the function receives nonlocal gotos, it needs to save the frame
27842      pointer in the nonlocal_goto_save_area object.  */
27843   if (cfun->has_nonlocal_label)
27844     return true;
27845
27846   /* The frame pointer is required for non-leaf APCS frames.  */
27847   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27848     return true;
27849
27850   /* If we are probing the stack in the prologue, we will have a faulting
27851      instruction prior to the stack adjustment and this requires a frame
27852      pointer if we want to catch the exception using the EABI unwinder.  */
27853   if (!IS_INTERRUPT (arm_current_func_type ())
27854       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27855       && arm_except_unwind_info (&global_options) == UI_TARGET
27856       && cfun->can_throw_non_call_exceptions)
27857     {
27858       HOST_WIDE_INT size = get_frame_size ();
27859
27860       /* That's irrelevant if there is no stack adjustment.  */
27861       if (size <= 0)
27862         return false;
27863
27864       /* That's relevant only if there is a stack probe.  */
27865       if (crtl->is_leaf && !cfun->calls_alloca)
27866         {
27867           /* We don't have the final size of the frame so adjust.  */
27868           size += 32 * UNITS_PER_WORD;
27869           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27870             return true;
27871         }
27872       else
27873         return true;
27874     }
27875
27876   return false;
27877 }
27878
27879 /* Only thumb1 can't support conditional execution, so return true if
27880    the target is not thumb1.  */
27881 static bool
27882 arm_have_conditional_execution (void)
27883 {
27884   return !TARGET_THUMB1;
27885 }
27886
27887 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27888 static HOST_WIDE_INT
27889 arm_vector_alignment (const_tree type)
27890 {
27891   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27892
27893   if (TARGET_AAPCS_BASED)
27894     align = MIN (align, 64);
27895
27896   return align;
27897 }
27898
27899 static unsigned int
27900 arm_autovectorize_vector_sizes (void)
27901 {
27902   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27903 }
27904
27905 static bool
27906 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27907 {
27908   /* Vectors which aren't in packed structures will not be less aligned than
27909      the natural alignment of their element type, so this is safe.  */
27910   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27911     return !is_packed;
27912
27913   return default_builtin_vector_alignment_reachable (type, is_packed);
27914 }
27915
27916 static bool
27917 arm_builtin_support_vector_misalignment (machine_mode mode,
27918                                          const_tree type, int misalignment,
27919                                          bool is_packed)
27920 {
27921   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27922     {
27923       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27924
27925       if (is_packed)
27926         return align == 1;
27927
27928       /* If the misalignment is unknown, we should be able to handle the access
27929          so long as it is not to a member of a packed data structure.  */
27930       if (misalignment == -1)
27931         return true;
27932
27933       /* Return true if the misalignment is a multiple of the natural alignment
27934          of the vector's element type.  This is probably always going to be
27935          true in practice, since we've already established that this isn't a
27936          packed access.  */
27937       return ((misalignment % align) == 0);
27938     }
27939
27940   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27941                                                       is_packed);
27942 }
27943
27944 static void
27945 arm_conditional_register_usage (void)
27946 {
27947   int regno;
27948
27949   if (TARGET_THUMB1 && optimize_size)
27950     {
27951       /* When optimizing for size on Thumb-1, it's better not
27952         to use the HI regs, because of the overhead of
27953         stacking them.  */
27954       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27955         fixed_regs[regno] = call_used_regs[regno] = 1;
27956     }
27957
27958   /* The link register can be clobbered by any branch insn,
27959      but we have no way to track that at present, so mark
27960      it as unavailable.  */
27961   if (TARGET_THUMB1)
27962     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27963
27964   if (TARGET_32BIT && TARGET_HARD_FLOAT)
27965     {
27966       /* VFPv3 registers are disabled when earlier VFP
27967          versions are selected due to the definition of
27968          LAST_VFP_REGNUM.  */
27969       for (regno = FIRST_VFP_REGNUM;
27970            regno <= LAST_VFP_REGNUM; ++ regno)
27971         {
27972           fixed_regs[regno] = 0;
27973           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27974             || regno >= FIRST_VFP_REGNUM + 32;
27975         }
27976     }
27977
27978   if (TARGET_REALLY_IWMMXT)
27979     {
27980       regno = FIRST_IWMMXT_GR_REGNUM;
27981       /* The 2002/10/09 revision of the XScale ABI has wCG0
27982          and wCG1 as call-preserved registers.  The 2002/11/21
27983          revision changed this so that all wCG registers are
27984          scratch registers.  */
27985       for (regno = FIRST_IWMMXT_GR_REGNUM;
27986            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27987         fixed_regs[regno] = 0;
27988       /* The XScale ABI has wR0 - wR9 as scratch registers,
27989          the rest as call-preserved registers.  */
27990       for (regno = FIRST_IWMMXT_REGNUM;
27991            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27992         {
27993           fixed_regs[regno] = 0;
27994           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27995         }
27996     }
27997
27998   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27999     {
28000       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28001       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28002     }
28003   else if (TARGET_APCS_STACK)
28004     {
28005       fixed_regs[10]     = 1;
28006       call_used_regs[10] = 1;
28007     }
28008   /* -mcaller-super-interworking reserves r11 for calls to
28009      _interwork_r11_call_via_rN().  Making the register global
28010      is an easy way of ensuring that it remains valid for all
28011      calls.  */
28012   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28013       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28014     {
28015       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28016       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28017       if (TARGET_CALLER_INTERWORKING)
28018         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28019     }
28020   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28021 }
28022
28023 static reg_class_t
28024 arm_preferred_rename_class (reg_class_t rclass)
28025 {
28026   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28027      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28028      and code size can be reduced.  */
28029   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28030     return LO_REGS;
28031   else
28032     return NO_REGS;
28033 }
28034
28035 /* Compute the attribute "length" of insn "*push_multi".
28036    So this function MUST be kept in sync with that insn pattern.  */
28037 int
28038 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28039 {
28040   int i, regno, hi_reg;
28041   int num_saves = XVECLEN (parallel_op, 0);
28042
28043   /* ARM mode.  */
28044   if (TARGET_ARM)
28045     return 4;
28046   /* Thumb1 mode.  */
28047   if (TARGET_THUMB1)
28048     return 2;
28049
28050   /* Thumb2 mode.  */
28051   regno = REGNO (first_op);
28052   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28053      list is 8-bit.  Normally this means all registers in the list must be
28054      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28055      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28056      with 16-bit encoding.  */
28057   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28058   for (i = 1; i < num_saves && !hi_reg; i++)
28059     {
28060       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28061       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28062     }
28063
28064   if (!hi_reg)
28065     return 2;
28066   return 4;
28067 }
28068
28069 /* Compute the attribute "length" of insn.  Currently, this function is used
28070    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28071    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28072    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28073    true if OPERANDS contains insn which explicit updates base register.  */
28074
28075 int
28076 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28077 {
28078   /* ARM mode.  */
28079   if (TARGET_ARM)
28080     return 4;
28081   /* Thumb1 mode.  */
28082   if (TARGET_THUMB1)
28083     return 2;
28084
28085   rtx parallel_op = operands[0];
28086   /* Initialize to elements number of PARALLEL.  */
28087   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28088   /* Initialize the value to base register.  */
28089   unsigned regno = REGNO (operands[1]);
28090   /* Skip return and write back pattern.
28091      We only need register pop pattern for later analysis.  */
28092   unsigned first_indx = 0;
28093   first_indx += return_pc ? 1 : 0;
28094   first_indx += write_back_p ? 1 : 0;
28095
28096   /* A pop operation can be done through LDM or POP.  If the base register is SP
28097      and if it's with write back, then a LDM will be alias of POP.  */
28098   bool pop_p = (regno == SP_REGNUM && write_back_p);
28099   bool ldm_p = !pop_p;
28100
28101   /* Check base register for LDM.  */
28102   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28103     return 4;
28104
28105   /* Check each register in the list.  */
28106   for (; indx >= first_indx; indx--)
28107     {
28108       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28109       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28110          comment in arm_attr_length_push_multi.  */
28111       if (REGNO_REG_CLASS (regno) == HI_REGS
28112           && (regno != PC_REGNUM || ldm_p))
28113         return 4;
28114     }
28115
28116   return 2;
28117 }
28118
28119 /* Compute the number of instructions emitted by output_move_double.  */
28120 int
28121 arm_count_output_move_double_insns (rtx *operands)
28122 {
28123   int count;
28124   rtx ops[2];
28125   /* output_move_double may modify the operands array, so call it
28126      here on a copy of the array.  */
28127   ops[0] = operands[0];
28128   ops[1] = operands[1];
28129   output_move_double (ops, false, &count);
28130   return count;
28131 }
28132
28133 int
28134 vfp3_const_double_for_fract_bits (rtx operand)
28135 {
28136   REAL_VALUE_TYPE r0;
28137
28138   if (!CONST_DOUBLE_P (operand))
28139     return 0;
28140
28141   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28142   if (exact_real_inverse (DFmode, &r0)
28143       && !REAL_VALUE_NEGATIVE (r0))
28144     {
28145       if (exact_real_truncate (DFmode, &r0))
28146         {
28147           HOST_WIDE_INT value = real_to_integer (&r0);
28148           value = value & 0xffffffff;
28149           if ((value != 0) && ( (value & (value - 1)) == 0))
28150             {
28151               int ret = exact_log2 (value);
28152               gcc_assert (IN_RANGE (ret, 0, 31));
28153               return ret;
28154             }
28155         }
28156     }
28157   return 0;
28158 }
28159
28160 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28161    log2 is in [1, 32], return that log2.  Otherwise return -1.
28162    This is used in the patterns for vcvt.s32.f32 floating-point to
28163    fixed-point conversions.  */
28164
28165 int
28166 vfp3_const_double_for_bits (rtx x)
28167 {
28168   const REAL_VALUE_TYPE *r;
28169
28170   if (!CONST_DOUBLE_P (x))
28171     return -1;
28172
28173   r = CONST_DOUBLE_REAL_VALUE (x);
28174
28175   if (REAL_VALUE_NEGATIVE (*r)
28176       || REAL_VALUE_ISNAN (*r)
28177       || REAL_VALUE_ISINF (*r)
28178       || !real_isinteger (r, SFmode))
28179     return -1;
28180
28181   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28182
28183 /* The exact_log2 above will have returned -1 if this is
28184    not an exact log2.  */
28185   if (!IN_RANGE (hwint, 1, 32))
28186     return -1;
28187
28188   return hwint;
28189 }
28190
28191 \f
28192 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28193
28194 static void
28195 arm_pre_atomic_barrier (enum memmodel model)
28196 {
28197   if (need_atomic_barrier_p (model, true))
28198     emit_insn (gen_memory_barrier ());
28199 }
28200
28201 static void
28202 arm_post_atomic_barrier (enum memmodel model)
28203 {
28204   if (need_atomic_barrier_p (model, false))
28205     emit_insn (gen_memory_barrier ());
28206 }
28207
28208 /* Emit the load-exclusive and store-exclusive instructions.
28209    Use acquire and release versions if necessary.  */
28210
28211 static void
28212 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28213 {
28214   rtx (*gen) (rtx, rtx);
28215
28216   if (acq)
28217     {
28218       switch (mode)
28219         {
28220         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28221         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28222         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28223         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28224         default:
28225           gcc_unreachable ();
28226         }
28227     }
28228   else
28229     {
28230       switch (mode)
28231         {
28232         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28233         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28234         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28235         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28236         default:
28237           gcc_unreachable ();
28238         }
28239     }
28240
28241   emit_insn (gen (rval, mem));
28242 }
28243
28244 static void
28245 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28246                           rtx mem, bool rel)
28247 {
28248   rtx (*gen) (rtx, rtx, rtx);
28249
28250   if (rel)
28251     {
28252       switch (mode)
28253         {
28254         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28255         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28256         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28257         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28258         default:
28259           gcc_unreachable ();
28260         }
28261     }
28262   else
28263     {
28264       switch (mode)
28265         {
28266         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28267         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28268         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28269         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28270         default:
28271           gcc_unreachable ();
28272         }
28273     }
28274
28275   emit_insn (gen (bval, rval, mem));
28276 }
28277
28278 /* Mark the previous jump instruction as unlikely.  */
28279
28280 static void
28281 emit_unlikely_jump (rtx insn)
28282 {
28283   rtx_insn *jump = emit_jump_insn (insn);
28284   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28285 }
28286
28287 /* Expand a compare and swap pattern.  */
28288
28289 void
28290 arm_expand_compare_and_swap (rtx operands[])
28291 {
28292   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28293   machine_mode mode;
28294   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28295
28296   bval = operands[0];
28297   rval = operands[1];
28298   mem = operands[2];
28299   oldval = operands[3];
28300   newval = operands[4];
28301   is_weak = operands[5];
28302   mod_s = operands[6];
28303   mod_f = operands[7];
28304   mode = GET_MODE (mem);
28305
28306   /* Normally the succ memory model must be stronger than fail, but in the
28307      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28308      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28309
28310   if (TARGET_HAVE_LDACQ
28311       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28312       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28313     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28314
28315   switch (mode)
28316     {
28317     case E_QImode:
28318     case E_HImode:
28319       /* For narrow modes, we're going to perform the comparison in SImode,
28320          so do the zero-extension now.  */
28321       rval = gen_reg_rtx (SImode);
28322       oldval = convert_modes (SImode, mode, oldval, true);
28323       /* FALLTHRU */
28324
28325     case E_SImode:
28326       /* Force the value into a register if needed.  We waited until after
28327          the zero-extension above to do this properly.  */
28328       if (!arm_add_operand (oldval, SImode))
28329         oldval = force_reg (SImode, oldval);
28330       break;
28331
28332     case E_DImode:
28333       if (!cmpdi_operand (oldval, mode))
28334         oldval = force_reg (mode, oldval);
28335       break;
28336
28337     default:
28338       gcc_unreachable ();
28339     }
28340
28341   if (TARGET_THUMB1)
28342     {
28343       switch (mode)
28344         {
28345         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28346         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28347         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28348         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28349         default:
28350           gcc_unreachable ();
28351         }
28352     }
28353   else
28354     {
28355       switch (mode)
28356         {
28357         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28358         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28359         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28360         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28361         default:
28362           gcc_unreachable ();
28363         }
28364     }
28365
28366   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28367   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28368
28369   if (mode == QImode || mode == HImode)
28370     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28371
28372   /* In all cases, we arrange for success to be signaled by Z set.
28373      This arrangement allows for the boolean result to be used directly
28374      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28375      boolean negation of the result is also stored in bval because Thumb-1
28376      backend lacks dependency tracking for CC flag due to flag-setting not
28377      being represented at RTL level.  */
28378   if (TARGET_THUMB1)
28379       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28380   else
28381     {
28382       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28383       emit_insn (gen_rtx_SET (bval, x));
28384     }
28385 }
28386
28387 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28388    another memory store between the load-exclusive and store-exclusive can
28389    reset the monitor from Exclusive to Open state.  This means we must wait
28390    until after reload to split the pattern, lest we get a register spill in
28391    the middle of the atomic sequence.  Success of the compare and swap is
28392    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28393    for Thumb-1 targets (ie. negation of the boolean value returned by
28394    atomic_compare_and_swapmode standard pattern in operand 0).  */
28395
28396 void
28397 arm_split_compare_and_swap (rtx operands[])
28398 {
28399   rtx rval, mem, oldval, newval, neg_bval;
28400   machine_mode mode;
28401   enum memmodel mod_s, mod_f;
28402   bool is_weak;
28403   rtx_code_label *label1, *label2;
28404   rtx x, cond;
28405
28406   rval = operands[1];
28407   mem = operands[2];
28408   oldval = operands[3];
28409   newval = operands[4];
28410   is_weak = (operands[5] != const0_rtx);
28411   mod_s = memmodel_from_int (INTVAL (operands[6]));
28412   mod_f = memmodel_from_int (INTVAL (operands[7]));
28413   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28414   mode = GET_MODE (mem);
28415
28416   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28417
28418   bool use_acquire = TARGET_HAVE_LDACQ
28419                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28420                           || is_mm_release (mod_s));
28421
28422   bool use_release = TARGET_HAVE_LDACQ
28423                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28424                           || is_mm_acquire (mod_s));
28425
28426   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28427      a full barrier is emitted after the store-release.  */
28428   if (is_armv8_sync)
28429     use_acquire = false;
28430
28431   /* Checks whether a barrier is needed and emits one accordingly.  */
28432   if (!(use_acquire || use_release))
28433     arm_pre_atomic_barrier (mod_s);
28434
28435   label1 = NULL;
28436   if (!is_weak)
28437     {
28438       label1 = gen_label_rtx ();
28439       emit_label (label1);
28440     }
28441   label2 = gen_label_rtx ();
28442
28443   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28444
28445   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28446      as required to communicate with arm_expand_compare_and_swap.  */
28447   if (TARGET_32BIT)
28448     {
28449       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28450       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28451       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28452                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28453       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28454     }
28455   else
28456     {
28457       emit_move_insn (neg_bval, const1_rtx);
28458       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28459       if (thumb1_cmpneg_operand (oldval, SImode))
28460         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28461                                                     label2, cond));
28462       else
28463         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28464     }
28465
28466   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28467
28468   /* Weak or strong, we want EQ to be true for success, so that we
28469      match the flags that we got from the compare above.  */
28470   if (TARGET_32BIT)
28471     {
28472       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28473       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28474       emit_insn (gen_rtx_SET (cond, x));
28475     }
28476
28477   if (!is_weak)
28478     {
28479       /* Z is set to boolean value of !neg_bval, as required to communicate
28480          with arm_expand_compare_and_swap.  */
28481       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28482       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28483     }
28484
28485   if (!is_mm_relaxed (mod_f))
28486     emit_label (label2);
28487
28488   /* Checks whether a barrier is needed and emits one accordingly.  */
28489   if (is_armv8_sync
28490       || !(use_acquire || use_release))
28491     arm_post_atomic_barrier (mod_s);
28492
28493   if (is_mm_relaxed (mod_f))
28494     emit_label (label2);
28495 }
28496
28497 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28498    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28499    operation).  Operation is performed on the content at MEM and on VALUE
28500    following the memory model MODEL_RTX.  The content at MEM before and after
28501    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28502    success of the operation is returned in COND.  Using a scratch register or
28503    an operand register for these determines what result is returned for that
28504    pattern.  */
28505
28506 void
28507 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28508                      rtx value, rtx model_rtx, rtx cond)
28509 {
28510   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28511   machine_mode mode = GET_MODE (mem);
28512   machine_mode wmode = (mode == DImode ? DImode : SImode);
28513   rtx_code_label *label;
28514   bool all_low_regs, bind_old_new;
28515   rtx x;
28516
28517   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28518
28519   bool use_acquire = TARGET_HAVE_LDACQ
28520                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28521                           || is_mm_release (model));
28522
28523   bool use_release = TARGET_HAVE_LDACQ
28524                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28525                           || is_mm_acquire (model));
28526
28527   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28528      a full barrier is emitted after the store-release.  */
28529   if (is_armv8_sync)
28530     use_acquire = false;
28531
28532   /* Checks whether a barrier is needed and emits one accordingly.  */
28533   if (!(use_acquire || use_release))
28534     arm_pre_atomic_barrier (model);
28535
28536   label = gen_label_rtx ();
28537   emit_label (label);
28538
28539   if (new_out)
28540     new_out = gen_lowpart (wmode, new_out);
28541   if (old_out)
28542     old_out = gen_lowpart (wmode, old_out);
28543   else
28544     old_out = new_out;
28545   value = simplify_gen_subreg (wmode, value, mode, 0);
28546
28547   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28548
28549   /* Does the operation require destination and first operand to use the same
28550      register?  This is decided by register constraints of relevant insn
28551      patterns in thumb1.md.  */
28552   gcc_assert (!new_out || REG_P (new_out));
28553   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28554                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28555                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28556   bind_old_new =
28557     (TARGET_THUMB1
28558      && code != SET
28559      && code != MINUS
28560      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28561
28562   /* We want to return the old value while putting the result of the operation
28563      in the same register as the old value so copy the old value over to the
28564      destination register and use that register for the operation.  */
28565   if (old_out && bind_old_new)
28566     {
28567       emit_move_insn (new_out, old_out);
28568       old_out = new_out;
28569     }
28570
28571   switch (code)
28572     {
28573     case SET:
28574       new_out = value;
28575       break;
28576
28577     case NOT:
28578       x = gen_rtx_AND (wmode, old_out, value);
28579       emit_insn (gen_rtx_SET (new_out, x));
28580       x = gen_rtx_NOT (wmode, new_out);
28581       emit_insn (gen_rtx_SET (new_out, x));
28582       break;
28583
28584     case MINUS:
28585       if (CONST_INT_P (value))
28586         {
28587           value = GEN_INT (-INTVAL (value));
28588           code = PLUS;
28589         }
28590       /* FALLTHRU */
28591
28592     case PLUS:
28593       if (mode == DImode)
28594         {
28595           /* DImode plus/minus need to clobber flags.  */
28596           /* The adddi3 and subdi3 patterns are incorrectly written so that
28597              they require matching operands, even when we could easily support
28598              three operands.  Thankfully, this can be fixed up post-splitting,
28599              as the individual add+adc patterns do accept three operands and
28600              post-reload cprop can make these moves go away.  */
28601           emit_move_insn (new_out, old_out);
28602           if (code == PLUS)
28603             x = gen_adddi3 (new_out, new_out, value);
28604           else
28605             x = gen_subdi3 (new_out, new_out, value);
28606           emit_insn (x);
28607           break;
28608         }
28609       /* FALLTHRU */
28610
28611     default:
28612       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28613       emit_insn (gen_rtx_SET (new_out, x));
28614       break;
28615     }
28616
28617   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28618                             use_release);
28619
28620   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28621   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28622
28623   /* Checks whether a barrier is needed and emits one accordingly.  */
28624   if (is_armv8_sync
28625       || !(use_acquire || use_release))
28626     arm_post_atomic_barrier (model);
28627 }
28628 \f
28629 #define MAX_VECT_LEN 16
28630
28631 struct expand_vec_perm_d
28632 {
28633   rtx target, op0, op1;
28634   unsigned char perm[MAX_VECT_LEN];
28635   machine_mode vmode;
28636   unsigned char nelt;
28637   bool one_vector_p;
28638   bool testing_p;
28639 };
28640
28641 /* Generate a variable permutation.  */
28642
28643 static void
28644 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28645 {
28646   machine_mode vmode = GET_MODE (target);
28647   bool one_vector_p = rtx_equal_p (op0, op1);
28648
28649   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28650   gcc_checking_assert (GET_MODE (op0) == vmode);
28651   gcc_checking_assert (GET_MODE (op1) == vmode);
28652   gcc_checking_assert (GET_MODE (sel) == vmode);
28653   gcc_checking_assert (TARGET_NEON);
28654
28655   if (one_vector_p)
28656     {
28657       if (vmode == V8QImode)
28658         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28659       else
28660         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28661     }
28662   else
28663     {
28664       rtx pair;
28665
28666       if (vmode == V8QImode)
28667         {
28668           pair = gen_reg_rtx (V16QImode);
28669           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28670           pair = gen_lowpart (TImode, pair);
28671           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28672         }
28673       else
28674         {
28675           pair = gen_reg_rtx (OImode);
28676           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28677           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28678         }
28679     }
28680 }
28681
28682 void
28683 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28684 {
28685   machine_mode vmode = GET_MODE (target);
28686   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28687   bool one_vector_p = rtx_equal_p (op0, op1);
28688   rtx rmask[MAX_VECT_LEN], mask;
28689
28690   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28691      numbering of elements for big-endian, we must reverse the order.  */
28692   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28693
28694   /* The VTBL instruction does not use a modulo index, so we must take care
28695      of that ourselves.  */
28696   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28697   for (i = 0; i < nelt; ++i)
28698     rmask[i] = mask;
28699   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28700   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28701
28702   arm_expand_vec_perm_1 (target, op0, op1, sel);
28703 }
28704
28705 /* Map lane ordering between architectural lane order, and GCC lane order,
28706    taking into account ABI.  See comment above output_move_neon for details.  */
28707
28708 static int
28709 neon_endian_lane_map (machine_mode mode, int lane)
28710 {
28711   if (BYTES_BIG_ENDIAN)
28712   {
28713     int nelems = GET_MODE_NUNITS (mode);
28714     /* Reverse lane order.  */
28715     lane = (nelems - 1 - lane);
28716     /* Reverse D register order, to match ABI.  */
28717     if (GET_MODE_SIZE (mode) == 16)
28718       lane = lane ^ (nelems / 2);
28719   }
28720   return lane;
28721 }
28722
28723 /* Some permutations index into pairs of vectors, this is a helper function
28724    to map indexes into those pairs of vectors.  */
28725
28726 static int
28727 neon_pair_endian_lane_map (machine_mode mode, int lane)
28728 {
28729   int nelem = GET_MODE_NUNITS (mode);
28730   if (BYTES_BIG_ENDIAN)
28731     lane =
28732       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28733   return lane;
28734 }
28735
28736 /* Generate or test for an insn that supports a constant permutation.  */
28737
28738 /* Recognize patterns for the VUZP insns.  */
28739
28740 static bool
28741 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28742 {
28743   unsigned int i, odd, mask, nelt = d->nelt;
28744   rtx out0, out1, in0, in1;
28745   rtx (*gen)(rtx, rtx, rtx, rtx);
28746   int first_elem;
28747   int swap_nelt;
28748
28749   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28750     return false;
28751
28752   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28753      big endian pattern on 64 bit vectors, so we correct for that.  */
28754   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28755     && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28756
28757   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28758
28759   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28760     odd = 0;
28761   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28762     odd = 1;
28763   else
28764     return false;
28765   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28766
28767   for (i = 0; i < nelt; i++)
28768     {
28769       unsigned elt =
28770         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28771       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28772         return false;
28773     }
28774
28775   /* Success!  */
28776   if (d->testing_p)
28777     return true;
28778
28779   switch (d->vmode)
28780     {
28781     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28782     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28783     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28784     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28785     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28786     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28787     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28788     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28789     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28790     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28791     default:
28792       gcc_unreachable ();
28793     }
28794
28795   in0 = d->op0;
28796   in1 = d->op1;
28797   if (swap_nelt != 0)
28798     std::swap (in0, in1);
28799
28800   out0 = d->target;
28801   out1 = gen_reg_rtx (d->vmode);
28802   if (odd)
28803     std::swap (out0, out1);
28804
28805   emit_insn (gen (out0, in0, in1, out1));
28806   return true;
28807 }
28808
28809 /* Recognize patterns for the VZIP insns.  */
28810
28811 static bool
28812 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28813 {
28814   unsigned int i, high, mask, nelt = d->nelt;
28815   rtx out0, out1, in0, in1;
28816   rtx (*gen)(rtx, rtx, rtx, rtx);
28817   int first_elem;
28818   bool is_swapped;
28819
28820   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28821     return false;
28822
28823   is_swapped = BYTES_BIG_ENDIAN;
28824
28825   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28826
28827   high = nelt / 2;
28828   if (first_elem == neon_endian_lane_map (d->vmode, high))
28829     ;
28830   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28831     high = 0;
28832   else
28833     return false;
28834   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28835
28836   for (i = 0; i < nelt / 2; i++)
28837     {
28838       unsigned elt =
28839         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28840       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28841           != elt)
28842         return false;
28843       elt =
28844         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28845       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28846           != elt)
28847         return false;
28848     }
28849
28850   /* Success!  */
28851   if (d->testing_p)
28852     return true;
28853
28854   switch (d->vmode)
28855     {
28856     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28857     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28858     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28859     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28860     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28861     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28862     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28863     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28864     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28865     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28866     default:
28867       gcc_unreachable ();
28868     }
28869
28870   in0 = d->op0;
28871   in1 = d->op1;
28872   if (is_swapped)
28873     std::swap (in0, in1);
28874
28875   out0 = d->target;
28876   out1 = gen_reg_rtx (d->vmode);
28877   if (high)
28878     std::swap (out0, out1);
28879
28880   emit_insn (gen (out0, in0, in1, out1));
28881   return true;
28882 }
28883
28884 /* Recognize patterns for the VREV insns.  */
28885
28886 static bool
28887 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28888 {
28889   unsigned int i, j, diff, nelt = d->nelt;
28890   rtx (*gen)(rtx, rtx);
28891
28892   if (!d->one_vector_p)
28893     return false;
28894
28895   diff = d->perm[0];
28896   switch (diff)
28897     {
28898     case 7:
28899       switch (d->vmode)
28900         {
28901         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28902         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28903         default:
28904           return false;
28905         }
28906       break;
28907     case 3:
28908       switch (d->vmode)
28909         {
28910         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28911         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28912         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28913         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28914         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28915         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28916         default:
28917           return false;
28918         }
28919       break;
28920     case 1:
28921       switch (d->vmode)
28922         {
28923         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28924         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28925         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28926         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28927         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28928         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28929         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28930         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28931         default:
28932           return false;
28933         }
28934       break;
28935     default:
28936       return false;
28937     }
28938
28939   for (i = 0; i < nelt ; i += diff + 1)
28940     for (j = 0; j <= diff; j += 1)
28941       {
28942         /* This is guaranteed to be true as the value of diff
28943            is 7, 3, 1 and we should have enough elements in the
28944            queue to generate this. Getting a vector mask with a
28945            value of diff other than these values implies that
28946            something is wrong by the time we get here.  */
28947         gcc_assert (i + j < nelt);
28948         if (d->perm[i + j] != i + diff - j)
28949           return false;
28950       }
28951
28952   /* Success! */
28953   if (d->testing_p)
28954     return true;
28955
28956   emit_insn (gen (d->target, d->op0));
28957   return true;
28958 }
28959
28960 /* Recognize patterns for the VTRN insns.  */
28961
28962 static bool
28963 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28964 {
28965   unsigned int i, odd, mask, nelt = d->nelt;
28966   rtx out0, out1, in0, in1;
28967   rtx (*gen)(rtx, rtx, rtx, rtx);
28968
28969   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28970     return false;
28971
28972   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28973   if (d->perm[0] == 0)
28974     odd = 0;
28975   else if (d->perm[0] == 1)
28976     odd = 1;
28977   else
28978     return false;
28979   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28980
28981   for (i = 0; i < nelt; i += 2)
28982     {
28983       if (d->perm[i] != i + odd)
28984         return false;
28985       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28986         return false;
28987     }
28988
28989   /* Success!  */
28990   if (d->testing_p)
28991     return true;
28992
28993   switch (d->vmode)
28994     {
28995     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28996     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28997     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28998     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28999     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29000     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29001     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29002     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29003     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29004     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29005     default:
29006       gcc_unreachable ();
29007     }
29008
29009   in0 = d->op0;
29010   in1 = d->op1;
29011   if (BYTES_BIG_ENDIAN)
29012     {
29013       std::swap (in0, in1);
29014       odd = !odd;
29015     }
29016
29017   out0 = d->target;
29018   out1 = gen_reg_rtx (d->vmode);
29019   if (odd)
29020     std::swap (out0, out1);
29021
29022   emit_insn (gen (out0, in0, in1, out1));
29023   return true;
29024 }
29025
29026 /* Recognize patterns for the VEXT insns.  */
29027
29028 static bool
29029 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29030 {
29031   unsigned int i, nelt = d->nelt;
29032   rtx (*gen) (rtx, rtx, rtx, rtx);
29033   rtx offset;
29034
29035   unsigned int location;
29036
29037   unsigned int next  = d->perm[0] + 1;
29038
29039   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29040   if (BYTES_BIG_ENDIAN)
29041     return false;
29042
29043   /* Check if the extracted indexes are increasing by one.  */
29044   for (i = 1; i < nelt; next++, i++)
29045     {
29046       /* If we hit the most significant element of the 2nd vector in
29047          the previous iteration, no need to test further.  */
29048       if (next == 2 * nelt)
29049         return false;
29050
29051       /* If we are operating on only one vector: it could be a
29052          rotation.  If there are only two elements of size < 64, let
29053          arm_evpc_neon_vrev catch it.  */
29054       if (d->one_vector_p && (next == nelt))
29055         {
29056           if ((nelt == 2) && (d->vmode != V2DImode))
29057             return false;
29058           else
29059             next = 0;
29060         }
29061
29062       if (d->perm[i] != next)
29063         return false;
29064     }
29065
29066   location = d->perm[0];
29067
29068   switch (d->vmode)
29069     {
29070     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29071     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29072     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29073     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29074     case E_V2SImode: gen = gen_neon_vextv2si; break;
29075     case E_V4SImode: gen = gen_neon_vextv4si; break;
29076     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29077     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29078     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29079     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29080     case E_V2DImode: gen = gen_neon_vextv2di; break;
29081     default:
29082       return false;
29083     }
29084
29085   /* Success! */
29086   if (d->testing_p)
29087     return true;
29088
29089   offset = GEN_INT (location);
29090   emit_insn (gen (d->target, d->op0, d->op1, offset));
29091   return true;
29092 }
29093
29094 /* The NEON VTBL instruction is a fully variable permuation that's even
29095    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29096    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29097    can do slightly better by expanding this as a constant where we don't
29098    have to apply a mask.  */
29099
29100 static bool
29101 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29102 {
29103   rtx rperm[MAX_VECT_LEN], sel;
29104   machine_mode vmode = d->vmode;
29105   unsigned int i, nelt = d->nelt;
29106
29107   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29108      numbering of elements for big-endian, we must reverse the order.  */
29109   if (BYTES_BIG_ENDIAN)
29110     return false;
29111
29112   if (d->testing_p)
29113     return true;
29114
29115   /* Generic code will try constant permutation twice.  Once with the
29116      original mode and again with the elements lowered to QImode.
29117      So wait and don't do the selector expansion ourselves.  */
29118   if (vmode != V8QImode && vmode != V16QImode)
29119     return false;
29120
29121   for (i = 0; i < nelt; ++i)
29122     rperm[i] = GEN_INT (d->perm[i]);
29123   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29124   sel = force_reg (vmode, sel);
29125
29126   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29127   return true;
29128 }
29129
29130 static bool
29131 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29132 {
29133   /* Check if the input mask matches vext before reordering the
29134      operands.  */
29135   if (TARGET_NEON)
29136     if (arm_evpc_neon_vext (d))
29137       return true;
29138
29139   /* The pattern matching functions above are written to look for a small
29140      number to begin the sequence (0, 1, N/2).  If we begin with an index
29141      from the second operand, we can swap the operands.  */
29142   if (d->perm[0] >= d->nelt)
29143     {
29144       unsigned i, nelt = d->nelt;
29145
29146       for (i = 0; i < nelt; ++i)
29147         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29148
29149       std::swap (d->op0, d->op1);
29150     }
29151
29152   if (TARGET_NEON)
29153     {
29154       if (arm_evpc_neon_vuzp (d))
29155         return true;
29156       if (arm_evpc_neon_vzip (d))
29157         return true;
29158       if (arm_evpc_neon_vrev (d))
29159         return true;
29160       if (arm_evpc_neon_vtrn (d))
29161         return true;
29162       return arm_evpc_neon_vtbl (d);
29163     }
29164   return false;
29165 }
29166
29167 /* Expand a vec_perm_const pattern.  */
29168
29169 bool
29170 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29171 {
29172   struct expand_vec_perm_d d;
29173   int i, nelt, which;
29174
29175   d.target = target;
29176   d.op0 = op0;
29177   d.op1 = op1;
29178
29179   d.vmode = GET_MODE (target);
29180   gcc_assert (VECTOR_MODE_P (d.vmode));
29181   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29182   d.testing_p = false;
29183
29184   for (i = which = 0; i < nelt; ++i)
29185     {
29186       rtx e = XVECEXP (sel, 0, i);
29187       int ei = INTVAL (e) & (2 * nelt - 1);
29188       which |= (ei < nelt ? 1 : 2);
29189       d.perm[i] = ei;
29190     }
29191
29192   switch (which)
29193     {
29194     default:
29195       gcc_unreachable();
29196
29197     case 3:
29198       d.one_vector_p = false;
29199       if (!rtx_equal_p (op0, op1))
29200         break;
29201
29202       /* The elements of PERM do not suggest that only the first operand
29203          is used, but both operands are identical.  Allow easier matching
29204          of the permutation by folding the permutation into the single
29205          input vector.  */
29206       /* FALLTHRU */
29207     case 2:
29208       for (i = 0; i < nelt; ++i)
29209         d.perm[i] &= nelt - 1;
29210       d.op0 = op1;
29211       d.one_vector_p = true;
29212       break;
29213
29214     case 1:
29215       d.op1 = op0;
29216       d.one_vector_p = true;
29217       break;
29218     }
29219
29220   return arm_expand_vec_perm_const_1 (&d);
29221 }
29222
29223 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29224
29225 static bool
29226 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29227                                  const unsigned char *sel)
29228 {
29229   struct expand_vec_perm_d d;
29230   unsigned int i, nelt, which;
29231   bool ret;
29232
29233   d.vmode = vmode;
29234   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29235   d.testing_p = true;
29236   memcpy (d.perm, sel, nelt);
29237
29238   /* Categorize the set of elements in the selector.  */
29239   for (i = which = 0; i < nelt; ++i)
29240     {
29241       unsigned char e = d.perm[i];
29242       gcc_assert (e < 2 * nelt);
29243       which |= (e < nelt ? 1 : 2);
29244     }
29245
29246   /* For all elements from second vector, fold the elements to first.  */
29247   if (which == 2)
29248     for (i = 0; i < nelt; ++i)
29249       d.perm[i] -= nelt;
29250
29251   /* Check whether the mask can be applied to the vector type.  */
29252   d.one_vector_p = (which != 3);
29253
29254   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29255   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29256   if (!d.one_vector_p)
29257     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29258
29259   start_sequence ();
29260   ret = arm_expand_vec_perm_const_1 (&d);
29261   end_sequence ();
29262
29263   return ret;
29264 }
29265
29266 bool
29267 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29268 {
29269   /* If we are soft float and we do not have ldrd
29270      then all auto increment forms are ok.  */
29271   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29272     return true;
29273
29274   switch (code)
29275     {
29276       /* Post increment and Pre Decrement are supported for all
29277          instruction forms except for vector forms.  */
29278     case ARM_POST_INC:
29279     case ARM_PRE_DEC:
29280       if (VECTOR_MODE_P (mode))
29281         {
29282           if (code != ARM_PRE_DEC)
29283             return true;
29284           else
29285             return false;
29286         }
29287
29288       return true;
29289
29290     case ARM_POST_DEC:
29291     case ARM_PRE_INC:
29292       /* Without LDRD and mode size greater than
29293          word size, there is no point in auto-incrementing
29294          because ldm and stm will not have these forms.  */
29295       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29296         return false;
29297
29298       /* Vector and floating point modes do not support
29299          these auto increment forms.  */
29300       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29301         return false;
29302
29303       return true;
29304
29305     default:
29306       return false;
29307
29308     }
29309
29310   return false;
29311 }
29312
29313 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29314    on ARM, since we know that shifts by negative amounts are no-ops.
29315    Additionally, the default expansion code is not available or suitable
29316    for post-reload insn splits (this can occur when the register allocator
29317    chooses not to do a shift in NEON).
29318
29319    This function is used in both initial expand and post-reload splits, and
29320    handles all kinds of 64-bit shifts.
29321
29322    Input requirements:
29323     - It is safe for the input and output to be the same register, but
29324       early-clobber rules apply for the shift amount and scratch registers.
29325     - Shift by register requires both scratch registers.  In all other cases
29326       the scratch registers may be NULL.
29327     - Ashiftrt by a register also clobbers the CC register.  */
29328 void
29329 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29330                                rtx amount, rtx scratch1, rtx scratch2)
29331 {
29332   rtx out_high = gen_highpart (SImode, out);
29333   rtx out_low = gen_lowpart (SImode, out);
29334   rtx in_high = gen_highpart (SImode, in);
29335   rtx in_low = gen_lowpart (SImode, in);
29336
29337   /* Terminology:
29338         in = the register pair containing the input value.
29339         out = the destination register pair.
29340         up = the high- or low-part of each pair.
29341         down = the opposite part to "up".
29342      In a shift, we can consider bits to shift from "up"-stream to
29343      "down"-stream, so in a left-shift "up" is the low-part and "down"
29344      is the high-part of each register pair.  */
29345
29346   rtx out_up   = code == ASHIFT ? out_low : out_high;
29347   rtx out_down = code == ASHIFT ? out_high : out_low;
29348   rtx in_up   = code == ASHIFT ? in_low : in_high;
29349   rtx in_down = code == ASHIFT ? in_high : in_low;
29350
29351   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29352   gcc_assert (out
29353               && (REG_P (out) || GET_CODE (out) == SUBREG)
29354               && GET_MODE (out) == DImode);
29355   gcc_assert (in
29356               && (REG_P (in) || GET_CODE (in) == SUBREG)
29357               && GET_MODE (in) == DImode);
29358   gcc_assert (amount
29359               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29360                    && GET_MODE (amount) == SImode)
29361                   || CONST_INT_P (amount)));
29362   gcc_assert (scratch1 == NULL
29363               || (GET_CODE (scratch1) == SCRATCH)
29364               || (GET_MODE (scratch1) == SImode
29365                   && REG_P (scratch1)));
29366   gcc_assert (scratch2 == NULL
29367               || (GET_CODE (scratch2) == SCRATCH)
29368               || (GET_MODE (scratch2) == SImode
29369                   && REG_P (scratch2)));
29370   gcc_assert (!REG_P (out) || !REG_P (amount)
29371               || !HARD_REGISTER_P (out)
29372               || (REGNO (out) != REGNO (amount)
29373                   && REGNO (out) + 1 != REGNO (amount)));
29374
29375   /* Macros to make following code more readable.  */
29376   #define SUB_32(DEST,SRC) \
29377             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29378   #define RSB_32(DEST,SRC) \
29379             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29380   #define SUB_S_32(DEST,SRC) \
29381             gen_addsi3_compare0 ((DEST), (SRC), \
29382                                  GEN_INT (-32))
29383   #define SET(DEST,SRC) \
29384             gen_rtx_SET ((DEST), (SRC))
29385   #define SHIFT(CODE,SRC,AMOUNT) \
29386             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29387   #define LSHIFT(CODE,SRC,AMOUNT) \
29388             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29389                             SImode, (SRC), (AMOUNT))
29390   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29391             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29392                             SImode, (SRC), (AMOUNT))
29393   #define ORR(A,B) \
29394             gen_rtx_IOR (SImode, (A), (B))
29395   #define BRANCH(COND,LABEL) \
29396             gen_arm_cond_branch ((LABEL), \
29397                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29398                                                    const0_rtx), \
29399                                  cc_reg)
29400
29401   /* Shifts by register and shifts by constant are handled separately.  */
29402   if (CONST_INT_P (amount))
29403     {
29404       /* We have a shift-by-constant.  */
29405
29406       /* First, handle out-of-range shift amounts.
29407          In both cases we try to match the result an ARM instruction in a
29408          shift-by-register would give.  This helps reduce execution
29409          differences between optimization levels, but it won't stop other
29410          parts of the compiler doing different things.  This is "undefined
29411          behavior, in any case.  */
29412       if (INTVAL (amount) <= 0)
29413         emit_insn (gen_movdi (out, in));
29414       else if (INTVAL (amount) >= 64)
29415         {
29416           if (code == ASHIFTRT)
29417             {
29418               rtx const31_rtx = GEN_INT (31);
29419               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29420               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29421             }
29422           else
29423             emit_insn (gen_movdi (out, const0_rtx));
29424         }
29425
29426       /* Now handle valid shifts. */
29427       else if (INTVAL (amount) < 32)
29428         {
29429           /* Shifts by a constant less than 32.  */
29430           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29431
29432           /* Clearing the out register in DImode first avoids lots
29433              of spilling and results in less stack usage.
29434              Later this redundant insn is completely removed.
29435              Do that only if "in" and "out" are different registers.  */
29436           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29437             emit_insn (SET (out, const0_rtx));
29438           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29439           emit_insn (SET (out_down,
29440                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29441                                out_down)));
29442           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29443         }
29444       else
29445         {
29446           /* Shifts by a constant greater than 31.  */
29447           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29448
29449           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29450             emit_insn (SET (out, const0_rtx));
29451           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29452           if (code == ASHIFTRT)
29453             emit_insn (gen_ashrsi3 (out_up, in_up,
29454                                     GEN_INT (31)));
29455           else
29456             emit_insn (SET (out_up, const0_rtx));
29457         }
29458     }
29459   else
29460     {
29461       /* We have a shift-by-register.  */
29462       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29463
29464       /* This alternative requires the scratch registers.  */
29465       gcc_assert (scratch1 && REG_P (scratch1));
29466       gcc_assert (scratch2 && REG_P (scratch2));
29467
29468       /* We will need the values "amount-32" and "32-amount" later.
29469          Swapping them around now allows the later code to be more general. */
29470       switch (code)
29471         {
29472         case ASHIFT:
29473           emit_insn (SUB_32 (scratch1, amount));
29474           emit_insn (RSB_32 (scratch2, amount));
29475           break;
29476         case ASHIFTRT:
29477           emit_insn (RSB_32 (scratch1, amount));
29478           /* Also set CC = amount > 32.  */
29479           emit_insn (SUB_S_32 (scratch2, amount));
29480           break;
29481         case LSHIFTRT:
29482           emit_insn (RSB_32 (scratch1, amount));
29483           emit_insn (SUB_32 (scratch2, amount));
29484           break;
29485         default:
29486           gcc_unreachable ();
29487         }
29488
29489       /* Emit code like this:
29490
29491          arithmetic-left:
29492             out_down = in_down << amount;
29493             out_down = (in_up << (amount - 32)) | out_down;
29494             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29495             out_up = in_up << amount;
29496
29497          arithmetic-right:
29498             out_down = in_down >> amount;
29499             out_down = (in_up << (32 - amount)) | out_down;
29500             if (amount < 32)
29501               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29502             out_up = in_up << amount;
29503
29504          logical-right:
29505             out_down = in_down >> amount;
29506             out_down = (in_up << (32 - amount)) | out_down;
29507             if (amount < 32)
29508               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29509             out_up = in_up << amount;
29510
29511           The ARM and Thumb2 variants are the same but implemented slightly
29512           differently.  If this were only called during expand we could just
29513           use the Thumb2 case and let combine do the right thing, but this
29514           can also be called from post-reload splitters.  */
29515
29516       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29517
29518       if (!TARGET_THUMB2)
29519         {
29520           /* Emit code for ARM mode.  */
29521           emit_insn (SET (out_down,
29522                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29523           if (code == ASHIFTRT)
29524             {
29525               rtx_code_label *done_label = gen_label_rtx ();
29526               emit_jump_insn (BRANCH (LT, done_label));
29527               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29528                                              out_down)));
29529               emit_label (done_label);
29530             }
29531           else
29532             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29533                                            out_down)));
29534         }
29535       else
29536         {
29537           /* Emit code for Thumb2 mode.
29538              Thumb2 can't do shift and or in one insn.  */
29539           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29540           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29541
29542           if (code == ASHIFTRT)
29543             {
29544               rtx_code_label *done_label = gen_label_rtx ();
29545               emit_jump_insn (BRANCH (LT, done_label));
29546               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29547               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29548               emit_label (done_label);
29549             }
29550           else
29551             {
29552               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29553               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29554             }
29555         }
29556
29557       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29558     }
29559
29560   #undef SUB_32
29561   #undef RSB_32
29562   #undef SUB_S_32
29563   #undef SET
29564   #undef SHIFT
29565   #undef LSHIFT
29566   #undef REV_LSHIFT
29567   #undef ORR
29568   #undef BRANCH
29569 }
29570
29571 /* Returns true if the pattern is a valid symbolic address, which is either a
29572    symbol_ref or (symbol_ref + addend).
29573
29574    According to the ARM ELF ABI, the initial addend of REL-type relocations
29575    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29576    literal field of the instruction as a 16-bit signed value in the range
29577    -32768 <= A < 32768.  */
29578
29579 bool
29580 arm_valid_symbolic_address_p (rtx addr)
29581 {
29582   rtx xop0, xop1 = NULL_RTX;
29583   rtx tmp = addr;
29584
29585   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29586     return true;
29587
29588   /* (const (plus: symbol_ref const_int))  */
29589   if (GET_CODE (addr) == CONST)
29590     tmp = XEXP (addr, 0);
29591
29592   if (GET_CODE (tmp) == PLUS)
29593     {
29594       xop0 = XEXP (tmp, 0);
29595       xop1 = XEXP (tmp, 1);
29596
29597       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29598           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29599     }
29600
29601   return false;
29602 }
29603
29604 /* Returns true if a valid comparison operation and makes
29605    the operands in a form that is valid.  */
29606 bool
29607 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29608 {
29609   enum rtx_code code = GET_CODE (*comparison);
29610   int code_int;
29611   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29612     ? GET_MODE (*op2) : GET_MODE (*op1);
29613
29614   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29615
29616   if (code == UNEQ || code == LTGT)
29617     return false;
29618
29619   code_int = (int)code;
29620   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29621   PUT_CODE (*comparison, (enum rtx_code)code_int);
29622
29623   switch (mode)
29624     {
29625     case E_SImode:
29626       if (!arm_add_operand (*op1, mode))
29627         *op1 = force_reg (mode, *op1);
29628       if (!arm_add_operand (*op2, mode))
29629         *op2 = force_reg (mode, *op2);
29630       return true;
29631
29632     case E_DImode:
29633       if (!cmpdi_operand (*op1, mode))
29634         *op1 = force_reg (mode, *op1);
29635       if (!cmpdi_operand (*op2, mode))
29636         *op2 = force_reg (mode, *op2);
29637       return true;
29638
29639     case E_HFmode:
29640       if (!TARGET_VFP_FP16INST)
29641         break;
29642       /* FP16 comparisons are done in SF mode.  */
29643       mode = SFmode;
29644       *op1 = convert_to_mode (mode, *op1, 1);
29645       *op2 = convert_to_mode (mode, *op2, 1);
29646       /* Fall through.  */
29647     case E_SFmode:
29648     case E_DFmode:
29649       if (!vfp_compare_operand (*op1, mode))
29650         *op1 = force_reg (mode, *op1);
29651       if (!vfp_compare_operand (*op2, mode))
29652         *op2 = force_reg (mode, *op2);
29653       return true;
29654     default:
29655       break;
29656     }
29657
29658   return false;
29659
29660 }
29661
29662 /* Maximum number of instructions to set block of memory.  */
29663 static int
29664 arm_block_set_max_insns (void)
29665 {
29666   if (optimize_function_for_size_p (cfun))
29667     return 4;
29668   else
29669     return current_tune->max_insns_inline_memset;
29670 }
29671
29672 /* Return TRUE if it's profitable to set block of memory for
29673    non-vectorized case.  VAL is the value to set the memory
29674    with.  LENGTH is the number of bytes to set.  ALIGN is the
29675    alignment of the destination memory in bytes.  UNALIGNED_P
29676    is TRUE if we can only set the memory with instructions
29677    meeting alignment requirements.  USE_STRD_P is TRUE if we
29678    can use strd to set the memory.  */
29679 static bool
29680 arm_block_set_non_vect_profit_p (rtx val,
29681                                  unsigned HOST_WIDE_INT length,
29682                                  unsigned HOST_WIDE_INT align,
29683                                  bool unaligned_p, bool use_strd_p)
29684 {
29685   int num = 0;
29686   /* For leftovers in bytes of 0-7, we can set the memory block using
29687      strb/strh/str with minimum instruction number.  */
29688   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29689
29690   if (unaligned_p)
29691     {
29692       num = arm_const_inline_cost (SET, val);
29693       num += length / align + length % align;
29694     }
29695   else if (use_strd_p)
29696     {
29697       num = arm_const_double_inline_cost (val);
29698       num += (length >> 3) + leftover[length & 7];
29699     }
29700   else
29701     {
29702       num = arm_const_inline_cost (SET, val);
29703       num += (length >> 2) + leftover[length & 3];
29704     }
29705
29706   /* We may be able to combine last pair STRH/STRB into a single STR
29707      by shifting one byte back.  */
29708   if (unaligned_access && length > 3 && (length & 3) == 3)
29709     num--;
29710
29711   return (num <= arm_block_set_max_insns ());
29712 }
29713
29714 /* Return TRUE if it's profitable to set block of memory for
29715    vectorized case.  LENGTH is the number of bytes to set.
29716    ALIGN is the alignment of destination memory in bytes.
29717    MODE is the vector mode used to set the memory.  */
29718 static bool
29719 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29720                              unsigned HOST_WIDE_INT align,
29721                              machine_mode mode)
29722 {
29723   int num;
29724   bool unaligned_p = ((align & 3) != 0);
29725   unsigned int nelt = GET_MODE_NUNITS (mode);
29726
29727   /* Instruction loading constant value.  */
29728   num = 1;
29729   /* Instructions storing the memory.  */
29730   num += (length + nelt - 1) / nelt;
29731   /* Instructions adjusting the address expression.  Only need to
29732      adjust address expression if it's 4 bytes aligned and bytes
29733      leftover can only be stored by mis-aligned store instruction.  */
29734   if (!unaligned_p && (length & 3) != 0)
29735     num++;
29736
29737   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29738   if (!unaligned_p && mode == V16QImode)
29739     num--;
29740
29741   return (num <= arm_block_set_max_insns ());
29742 }
29743
29744 /* Set a block of memory using vectorization instructions for the
29745    unaligned case.  We fill the first LENGTH bytes of the memory
29746    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29747    the alignment requirement of memory.  Return TRUE if succeeded.  */
29748 static bool
29749 arm_block_set_unaligned_vect (rtx dstbase,
29750                               unsigned HOST_WIDE_INT length,
29751                               unsigned HOST_WIDE_INT value,
29752                               unsigned HOST_WIDE_INT align)
29753 {
29754   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29755   rtx dst, mem;
29756   rtx val_elt, val_vec, reg;
29757   rtx rval[MAX_VECT_LEN];
29758   rtx (*gen_func) (rtx, rtx);
29759   machine_mode mode;
29760   unsigned HOST_WIDE_INT v = value;
29761   unsigned int offset = 0;
29762   gcc_assert ((align & 0x3) != 0);
29763   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29764   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29765   if (length >= nelt_v16)
29766     {
29767       mode = V16QImode;
29768       gen_func = gen_movmisalignv16qi;
29769     }
29770   else
29771     {
29772       mode = V8QImode;
29773       gen_func = gen_movmisalignv8qi;
29774     }
29775   nelt_mode = GET_MODE_NUNITS (mode);
29776   gcc_assert (length >= nelt_mode);
29777   /* Skip if it isn't profitable.  */
29778   if (!arm_block_set_vect_profit_p (length, align, mode))
29779     return false;
29780
29781   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29782   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29783
29784   v = sext_hwi (v, BITS_PER_WORD);
29785   val_elt = GEN_INT (v);
29786   for (j = 0; j < nelt_mode; j++)
29787     rval[j] = val_elt;
29788
29789   reg = gen_reg_rtx (mode);
29790   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29791   /* Emit instruction loading the constant value.  */
29792   emit_move_insn (reg, val_vec);
29793
29794   /* Handle nelt_mode bytes in a vector.  */
29795   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29796     {
29797       emit_insn ((*gen_func) (mem, reg));
29798       if (i + 2 * nelt_mode <= length)
29799         {
29800           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29801           offset += nelt_mode;
29802           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29803         }
29804     }
29805
29806   /* If there are not less than nelt_v8 bytes leftover, we must be in
29807      V16QI mode.  */
29808   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29809
29810   /* Handle (8, 16) bytes leftover.  */
29811   if (i + nelt_v8 < length)
29812     {
29813       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29814       offset += length - i;
29815       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29816
29817       /* We are shifting bytes back, set the alignment accordingly.  */
29818       if ((length & 1) != 0 && align >= 2)
29819         set_mem_align (mem, BITS_PER_UNIT);
29820
29821       emit_insn (gen_movmisalignv16qi (mem, reg));
29822     }
29823   /* Handle (0, 8] bytes leftover.  */
29824   else if (i < length && i + nelt_v8 >= length)
29825     {
29826       if (mode == V16QImode)
29827         reg = gen_lowpart (V8QImode, reg);
29828
29829       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29830                                               + (nelt_mode - nelt_v8))));
29831       offset += (length - i) + (nelt_mode - nelt_v8);
29832       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29833
29834       /* We are shifting bytes back, set the alignment accordingly.  */
29835       if ((length & 1) != 0 && align >= 2)
29836         set_mem_align (mem, BITS_PER_UNIT);
29837
29838       emit_insn (gen_movmisalignv8qi (mem, reg));
29839     }
29840
29841   return true;
29842 }
29843
29844 /* Set a block of memory using vectorization instructions for the
29845    aligned case.  We fill the first LENGTH bytes of the memory area
29846    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29847    alignment requirement of memory.  Return TRUE if succeeded.  */
29848 static bool
29849 arm_block_set_aligned_vect (rtx dstbase,
29850                             unsigned HOST_WIDE_INT length,
29851                             unsigned HOST_WIDE_INT value,
29852                             unsigned HOST_WIDE_INT align)
29853 {
29854   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29855   rtx dst, addr, mem;
29856   rtx val_elt, val_vec, reg;
29857   rtx rval[MAX_VECT_LEN];
29858   machine_mode mode;
29859   unsigned HOST_WIDE_INT v = value;
29860   unsigned int offset = 0;
29861
29862   gcc_assert ((align & 0x3) == 0);
29863   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29864   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29865   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29866     mode = V16QImode;
29867   else
29868     mode = V8QImode;
29869
29870   nelt_mode = GET_MODE_NUNITS (mode);
29871   gcc_assert (length >= nelt_mode);
29872   /* Skip if it isn't profitable.  */
29873   if (!arm_block_set_vect_profit_p (length, align, mode))
29874     return false;
29875
29876   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29877
29878   v = sext_hwi (v, BITS_PER_WORD);
29879   val_elt = GEN_INT (v);
29880   for (j = 0; j < nelt_mode; j++)
29881     rval[j] = val_elt;
29882
29883   reg = gen_reg_rtx (mode);
29884   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29885   /* Emit instruction loading the constant value.  */
29886   emit_move_insn (reg, val_vec);
29887
29888   i = 0;
29889   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29890   if (mode == V16QImode)
29891     {
29892       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29893       emit_insn (gen_movmisalignv16qi (mem, reg));
29894       i += nelt_mode;
29895       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29896       if (i + nelt_v8 < length && i + nelt_v16 > length)
29897         {
29898           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29899           offset += length - nelt_mode;
29900           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29901           /* We are shifting bytes back, set the alignment accordingly.  */
29902           if ((length & 0x3) == 0)
29903             set_mem_align (mem, BITS_PER_UNIT * 4);
29904           else if ((length & 0x1) == 0)
29905             set_mem_align (mem, BITS_PER_UNIT * 2);
29906           else
29907             set_mem_align (mem, BITS_PER_UNIT);
29908
29909           emit_insn (gen_movmisalignv16qi (mem, reg));
29910           return true;
29911         }
29912       /* Fall through for bytes leftover.  */
29913       mode = V8QImode;
29914       nelt_mode = GET_MODE_NUNITS (mode);
29915       reg = gen_lowpart (V8QImode, reg);
29916     }
29917
29918   /* Handle 8 bytes in a vector.  */
29919   for (; (i + nelt_mode <= length); i += nelt_mode)
29920     {
29921       addr = plus_constant (Pmode, dst, i);
29922       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29923       emit_move_insn (mem, reg);
29924     }
29925
29926   /* Handle single word leftover by shifting 4 bytes back.  We can
29927      use aligned access for this case.  */
29928   if (i + UNITS_PER_WORD == length)
29929     {
29930       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29931       offset += i - UNITS_PER_WORD;
29932       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29933       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29934       if (align > UNITS_PER_WORD)
29935         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29936
29937       emit_move_insn (mem, reg);
29938     }
29939   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29940      We have to use unaligned access for this case.  */
29941   else if (i < length)
29942     {
29943       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29944       offset += length - nelt_mode;
29945       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29946       /* We are shifting bytes back, set the alignment accordingly.  */
29947       if ((length & 1) == 0)
29948         set_mem_align (mem, BITS_PER_UNIT * 2);
29949       else
29950         set_mem_align (mem, BITS_PER_UNIT);
29951
29952       emit_insn (gen_movmisalignv8qi (mem, reg));
29953     }
29954
29955   return true;
29956 }
29957
29958 /* Set a block of memory using plain strh/strb instructions, only
29959    using instructions allowed by ALIGN on processor.  We fill the
29960    first LENGTH bytes of the memory area starting from DSTBASE
29961    with byte constant VALUE.  ALIGN is the alignment requirement
29962    of memory.  */
29963 static bool
29964 arm_block_set_unaligned_non_vect (rtx dstbase,
29965                                   unsigned HOST_WIDE_INT length,
29966                                   unsigned HOST_WIDE_INT value,
29967                                   unsigned HOST_WIDE_INT align)
29968 {
29969   unsigned int i;
29970   rtx dst, addr, mem;
29971   rtx val_exp, val_reg, reg;
29972   machine_mode mode;
29973   HOST_WIDE_INT v = value;
29974
29975   gcc_assert (align == 1 || align == 2);
29976
29977   if (align == 2)
29978     v |= (value << BITS_PER_UNIT);
29979
29980   v = sext_hwi (v, BITS_PER_WORD);
29981   val_exp = GEN_INT (v);
29982   /* Skip if it isn't profitable.  */
29983   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29984                                         align, true, false))
29985     return false;
29986
29987   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29988   mode = (align == 2 ? HImode : QImode);
29989   val_reg = force_reg (SImode, val_exp);
29990   reg = gen_lowpart (mode, val_reg);
29991
29992   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29993     {
29994       addr = plus_constant (Pmode, dst, i);
29995       mem = adjust_automodify_address (dstbase, mode, addr, i);
29996       emit_move_insn (mem, reg);
29997     }
29998
29999   /* Handle single byte leftover.  */
30000   if (i + 1 == length)
30001     {
30002       reg = gen_lowpart (QImode, val_reg);
30003       addr = plus_constant (Pmode, dst, i);
30004       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30005       emit_move_insn (mem, reg);
30006       i++;
30007     }
30008
30009   gcc_assert (i == length);
30010   return true;
30011 }
30012
30013 /* Set a block of memory using plain strd/str/strh/strb instructions,
30014    to permit unaligned copies on processors which support unaligned
30015    semantics for those instructions.  We fill the first LENGTH bytes
30016    of the memory area starting from DSTBASE with byte constant VALUE.
30017    ALIGN is the alignment requirement of memory.  */
30018 static bool
30019 arm_block_set_aligned_non_vect (rtx dstbase,
30020                                 unsigned HOST_WIDE_INT length,
30021                                 unsigned HOST_WIDE_INT value,
30022                                 unsigned HOST_WIDE_INT align)
30023 {
30024   unsigned int i;
30025   rtx dst, addr, mem;
30026   rtx val_exp, val_reg, reg;
30027   unsigned HOST_WIDE_INT v;
30028   bool use_strd_p;
30029
30030   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30031                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30032
30033   v = (value | (value << 8) | (value << 16) | (value << 24));
30034   if (length < UNITS_PER_WORD)
30035     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30036
30037   if (use_strd_p)
30038     v |= (v << BITS_PER_WORD);
30039   else
30040     v = sext_hwi (v, BITS_PER_WORD);
30041
30042   val_exp = GEN_INT (v);
30043   /* Skip if it isn't profitable.  */
30044   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30045                                         align, false, use_strd_p))
30046     {
30047       if (!use_strd_p)
30048         return false;
30049
30050       /* Try without strd.  */
30051       v = (v >> BITS_PER_WORD);
30052       v = sext_hwi (v, BITS_PER_WORD);
30053       val_exp = GEN_INT (v);
30054       use_strd_p = false;
30055       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30056                                             align, false, use_strd_p))
30057         return false;
30058     }
30059
30060   i = 0;
30061   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30062   /* Handle double words using strd if possible.  */
30063   if (use_strd_p)
30064     {
30065       val_reg = force_reg (DImode, val_exp);
30066       reg = val_reg;
30067       for (; (i + 8 <= length); i += 8)
30068         {
30069           addr = plus_constant (Pmode, dst, i);
30070           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30071           emit_move_insn (mem, reg);
30072         }
30073     }
30074   else
30075     val_reg = force_reg (SImode, val_exp);
30076
30077   /* Handle words.  */
30078   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30079   for (; (i + 4 <= length); i += 4)
30080     {
30081       addr = plus_constant (Pmode, dst, i);
30082       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30083       if ((align & 3) == 0)
30084         emit_move_insn (mem, reg);
30085       else
30086         emit_insn (gen_unaligned_storesi (mem, reg));
30087     }
30088
30089   /* Merge last pair of STRH and STRB into a STR if possible.  */
30090   if (unaligned_access && i > 0 && (i + 3) == length)
30091     {
30092       addr = plus_constant (Pmode, dst, i - 1);
30093       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30094       /* We are shifting one byte back, set the alignment accordingly.  */
30095       if ((align & 1) == 0)
30096         set_mem_align (mem, BITS_PER_UNIT);
30097
30098       /* Most likely this is an unaligned access, and we can't tell at
30099          compilation time.  */
30100       emit_insn (gen_unaligned_storesi (mem, reg));
30101       return true;
30102     }
30103
30104   /* Handle half word leftover.  */
30105   if (i + 2 <= length)
30106     {
30107       reg = gen_lowpart (HImode, val_reg);
30108       addr = plus_constant (Pmode, dst, i);
30109       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30110       if ((align & 1) == 0)
30111         emit_move_insn (mem, reg);
30112       else
30113         emit_insn (gen_unaligned_storehi (mem, reg));
30114
30115       i += 2;
30116     }
30117
30118   /* Handle single byte leftover.  */
30119   if (i + 1 == length)
30120     {
30121       reg = gen_lowpart (QImode, val_reg);
30122       addr = plus_constant (Pmode, dst, i);
30123       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30124       emit_move_insn (mem, reg);
30125     }
30126
30127   return true;
30128 }
30129
30130 /* Set a block of memory using vectorization instructions for both
30131    aligned and unaligned cases.  We fill the first LENGTH bytes of
30132    the memory area starting from DSTBASE with byte constant VALUE.
30133    ALIGN is the alignment requirement of memory.  */
30134 static bool
30135 arm_block_set_vect (rtx dstbase,
30136                     unsigned HOST_WIDE_INT length,
30137                     unsigned HOST_WIDE_INT value,
30138                     unsigned HOST_WIDE_INT align)
30139 {
30140   /* Check whether we need to use unaligned store instruction.  */
30141   if (((align & 3) != 0 || (length & 3) != 0)
30142       /* Check whether unaligned store instruction is available.  */
30143       && (!unaligned_access || BYTES_BIG_ENDIAN))
30144     return false;
30145
30146   if ((align & 3) == 0)
30147     return arm_block_set_aligned_vect (dstbase, length, value, align);
30148   else
30149     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30150 }
30151
30152 /* Expand string store operation.  Firstly we try to do that by using
30153    vectorization instructions, then try with ARM unaligned access and
30154    double-word store if profitable.  OPERANDS[0] is the destination,
30155    OPERANDS[1] is the number of bytes, operands[2] is the value to
30156    initialize the memory, OPERANDS[3] is the known alignment of the
30157    destination.  */
30158 bool
30159 arm_gen_setmem (rtx *operands)
30160 {
30161   rtx dstbase = operands[0];
30162   unsigned HOST_WIDE_INT length;
30163   unsigned HOST_WIDE_INT value;
30164   unsigned HOST_WIDE_INT align;
30165
30166   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30167     return false;
30168
30169   length = UINTVAL (operands[1]);
30170   if (length > 64)
30171     return false;
30172
30173   value = (UINTVAL (operands[2]) & 0xFF);
30174   align = UINTVAL (operands[3]);
30175   if (TARGET_NEON && length >= 8
30176       && current_tune->string_ops_prefer_neon
30177       && arm_block_set_vect (dstbase, length, value, align))
30178     return true;
30179
30180   if (!unaligned_access && (align & 3) != 0)
30181     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30182
30183   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30184 }
30185
30186
30187 static bool
30188 arm_macro_fusion_p (void)
30189 {
30190   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30191 }
30192
30193 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30194    for MOVW / MOVT macro fusion.  */
30195
30196 static bool
30197 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30198 {
30199   /* We are trying to fuse
30200      movw imm / movt imm
30201     instructions as a group that gets scheduled together.  */
30202
30203   rtx set_dest = SET_DEST (curr_set);
30204
30205   if (GET_MODE (set_dest) != SImode)
30206     return false;
30207
30208   /* We are trying to match:
30209      prev (movw)  == (set (reg r0) (const_int imm16))
30210      curr (movt) == (set (zero_extract (reg r0)
30211                                         (const_int 16)
30212                                         (const_int 16))
30213                           (const_int imm16_1))
30214      or
30215      prev (movw) == (set (reg r1)
30216                           (high (symbol_ref ("SYM"))))
30217     curr (movt) == (set (reg r0)
30218                         (lo_sum (reg r1)
30219                                 (symbol_ref ("SYM"))))  */
30220
30221     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30222       {
30223         if (CONST_INT_P (SET_SRC (curr_set))
30224             && CONST_INT_P (SET_SRC (prev_set))
30225             && REG_P (XEXP (set_dest, 0))
30226             && REG_P (SET_DEST (prev_set))
30227             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30228           return true;
30229
30230       }
30231     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30232              && REG_P (SET_DEST (curr_set))
30233              && REG_P (SET_DEST (prev_set))
30234              && GET_CODE (SET_SRC (prev_set)) == HIGH
30235              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30236       return true;
30237
30238   return false;
30239 }
30240
30241 static bool
30242 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30243 {
30244   rtx prev_set = single_set (prev);
30245   rtx curr_set = single_set (curr);
30246
30247   if (!prev_set
30248       || !curr_set)
30249     return false;
30250
30251   if (any_condjump_p (curr))
30252     return false;
30253
30254   if (!arm_macro_fusion_p ())
30255     return false;
30256
30257   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30258       && aarch_crypto_can_dual_issue (prev, curr))
30259     return true;
30260
30261   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30262       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30263     return true;
30264
30265   return false;
30266 }
30267
30268 /* Return true iff the instruction fusion described by OP is enabled.  */
30269 bool
30270 arm_fusion_enabled_p (tune_params::fuse_ops op)
30271 {
30272   return current_tune->fusible_ops & op;
30273 }
30274
30275 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30276    scheduled for speculative execution.  Reject the long-running division
30277    and square-root instructions.  */
30278
30279 static bool
30280 arm_sched_can_speculate_insn (rtx_insn *insn)
30281 {
30282   switch (get_attr_type (insn))
30283     {
30284       case TYPE_SDIV:
30285       case TYPE_UDIV:
30286       case TYPE_FDIVS:
30287       case TYPE_FDIVD:
30288       case TYPE_FSQRTS:
30289       case TYPE_FSQRTD:
30290       case TYPE_NEON_FP_SQRT_S:
30291       case TYPE_NEON_FP_SQRT_D:
30292       case TYPE_NEON_FP_SQRT_S_Q:
30293       case TYPE_NEON_FP_SQRT_D_Q:
30294       case TYPE_NEON_FP_DIV_S:
30295       case TYPE_NEON_FP_DIV_D:
30296       case TYPE_NEON_FP_DIV_S_Q:
30297       case TYPE_NEON_FP_DIV_D_Q:
30298         return false;
30299       default:
30300         return true;
30301     }
30302 }
30303
30304 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30305
30306 static unsigned HOST_WIDE_INT
30307 arm_asan_shadow_offset (void)
30308 {
30309   return HOST_WIDE_INT_1U << 29;
30310 }
30311
30312
30313 /* This is a temporary fix for PR60655.  Ideally we need
30314    to handle most of these cases in the generic part but
30315    currently we reject minus (..) (sym_ref).  We try to
30316    ameliorate the case with minus (sym_ref1) (sym_ref2)
30317    where they are in the same section.  */
30318
30319 static bool
30320 arm_const_not_ok_for_debug_p (rtx p)
30321 {
30322   tree decl_op0 = NULL;
30323   tree decl_op1 = NULL;
30324
30325   if (GET_CODE (p) == MINUS)
30326     {
30327       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30328         {
30329           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30330           if (decl_op1
30331               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30332               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30333             {
30334               if ((VAR_P (decl_op1)
30335                    || TREE_CODE (decl_op1) == CONST_DECL)
30336                   && (VAR_P (decl_op0)
30337                       || TREE_CODE (decl_op0) == CONST_DECL))
30338                 return (get_variable_section (decl_op1, false)
30339                         != get_variable_section (decl_op0, false));
30340
30341               if (TREE_CODE (decl_op1) == LABEL_DECL
30342                   && TREE_CODE (decl_op0) == LABEL_DECL)
30343                 return (DECL_CONTEXT (decl_op1)
30344                         != DECL_CONTEXT (decl_op0));
30345             }
30346
30347           return true;
30348         }
30349     }
30350
30351   return false;
30352 }
30353
30354 /* return TRUE if x is a reference to a value in a constant pool */
30355 extern bool
30356 arm_is_constant_pool_ref (rtx x)
30357 {
30358   return (MEM_P (x)
30359           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30360           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30361 }
30362
30363 /* Remember the last target of arm_set_current_function.  */
30364 static GTY(()) tree arm_previous_fndecl;
30365
30366 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30367
30368 void
30369 save_restore_target_globals (tree new_tree)
30370 {
30371   /* If we have a previous state, use it.  */
30372   if (TREE_TARGET_GLOBALS (new_tree))
30373     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30374   else if (new_tree == target_option_default_node)
30375     restore_target_globals (&default_target_globals);
30376   else
30377     {
30378       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30379       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30380     }
30381
30382   arm_option_params_internal ();
30383 }
30384
30385 /* Invalidate arm_previous_fndecl.  */
30386
30387 void
30388 arm_reset_previous_fndecl (void)
30389 {
30390   arm_previous_fndecl = NULL_TREE;
30391 }
30392
30393 /* Establish appropriate back-end context for processing the function
30394    FNDECL.  The argument might be NULL to indicate processing at top
30395    level, outside of any function scope.  */
30396
30397 static void
30398 arm_set_current_function (tree fndecl)
30399 {
30400   if (!fndecl || fndecl == arm_previous_fndecl)
30401     return;
30402
30403   tree old_tree = (arm_previous_fndecl
30404                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30405                    : NULL_TREE);
30406
30407   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30408
30409   /* If current function has no attributes but previous one did,
30410      use the default node.  */
30411   if (! new_tree && old_tree)
30412     new_tree = target_option_default_node;
30413
30414   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30415      the default have been handled by save_restore_target_globals from
30416      arm_pragma_target_parse.  */
30417   if (old_tree == new_tree)
30418     return;
30419
30420   arm_previous_fndecl = fndecl;
30421
30422   /* First set the target options.  */
30423   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30424
30425   save_restore_target_globals (new_tree);
30426 }
30427
30428 /* Implement TARGET_OPTION_PRINT.  */
30429
30430 static void
30431 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30432 {
30433   int flags = ptr->x_target_flags;
30434   const char *fpu_name;
30435
30436   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30437               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30438
30439   fprintf (file, "%*sselected isa %s\n", indent, "",
30440            TARGET_THUMB2_P (flags) ? "thumb2" :
30441            TARGET_THUMB_P (flags) ? "thumb1" :
30442            "arm");
30443
30444   if (ptr->x_arm_arch_string)
30445     fprintf (file, "%*sselected architecture %s\n", indent, "",
30446              ptr->x_arm_arch_string);
30447
30448   if (ptr->x_arm_cpu_string)
30449     fprintf (file, "%*sselected CPU %s\n", indent, "",
30450              ptr->x_arm_cpu_string);
30451
30452   if (ptr->x_arm_tune_string)
30453     fprintf (file, "%*sselected tune %s\n", indent, "",
30454              ptr->x_arm_tune_string);
30455
30456   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30457 }
30458
30459 /* Hook to determine if one function can safely inline another.  */
30460
30461 static bool
30462 arm_can_inline_p (tree caller, tree callee)
30463 {
30464   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30465   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30466   bool can_inline = true;
30467
30468   struct cl_target_option *caller_opts
30469         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30470                                            : target_option_default_node);
30471
30472   struct cl_target_option *callee_opts
30473         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30474                                            : target_option_default_node);
30475
30476   if (callee_opts == caller_opts)
30477     return true;
30478
30479   /* Callee's ISA features should be a subset of the caller's.  */
30480   struct arm_build_target caller_target;
30481   struct arm_build_target callee_target;
30482   caller_target.isa = sbitmap_alloc (isa_num_bits);
30483   callee_target.isa = sbitmap_alloc (isa_num_bits);
30484
30485   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30486                               false);
30487   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30488                               false);
30489   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30490     can_inline = false;
30491
30492   sbitmap_free (caller_target.isa);
30493   sbitmap_free (callee_target.isa);
30494
30495   /* OK to inline between different modes.
30496      Function with mode specific instructions, e.g using asm,
30497      must be explicitly protected with noinline.  */
30498   return can_inline;
30499 }
30500
30501 /* Hook to fix function's alignment affected by target attribute.  */
30502
30503 static void
30504 arm_relayout_function (tree fndecl)
30505 {
30506   if (DECL_USER_ALIGN (fndecl))
30507     return;
30508
30509   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30510
30511   if (!callee_tree)
30512     callee_tree = target_option_default_node;
30513
30514   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30515   SET_DECL_ALIGN
30516     (fndecl,
30517      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30518 }
30519
30520 /* Inner function to process the attribute((target(...))), take an argument and
30521    set the current options from the argument.  If we have a list, recursively
30522    go over the list.  */
30523
30524 static bool
30525 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30526 {
30527   if (TREE_CODE (args) == TREE_LIST)
30528     {
30529       bool ret = true;
30530
30531       for (; args; args = TREE_CHAIN (args))
30532         if (TREE_VALUE (args)
30533             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30534           ret = false;
30535       return ret;
30536     }
30537
30538   else if (TREE_CODE (args) != STRING_CST)
30539     {
30540       error ("attribute %<target%> argument not a string");
30541       return false;
30542     }
30543
30544   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30545   char *q;
30546
30547   while ((q = strtok (argstr, ",")) != NULL)
30548     {
30549       while (ISSPACE (*q)) ++q;
30550
30551       argstr = NULL;
30552       if (!strncmp (q, "thumb", 5))
30553           opts->x_target_flags |= MASK_THUMB;
30554
30555       else if (!strncmp (q, "arm", 3))
30556           opts->x_target_flags &= ~MASK_THUMB;
30557
30558       else if (!strncmp (q, "fpu=", 4))
30559         {
30560           int fpu_index;
30561           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30562                                        &fpu_index, CL_TARGET))
30563             {
30564               error ("invalid fpu for attribute(target(\"%s\"))", q);
30565               return false;
30566             }
30567           if (fpu_index == TARGET_FPU_auto)
30568             {
30569               /* This doesn't really make sense until we support
30570                  general dynamic selection of the architecture and all
30571                  sub-features.  */
30572               sorry ("auto fpu selection not currently permitted here");
30573               return false;
30574             }
30575           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30576         }
30577       else
30578         {
30579           error ("attribute(target(\"%s\")) is unknown", q);
30580           return false;
30581         }
30582     }
30583
30584   return true;
30585 }
30586
30587 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30588
30589 tree
30590 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30591                                  struct gcc_options *opts_set)
30592 {
30593   struct cl_target_option cl_opts;
30594
30595   if (!arm_valid_target_attribute_rec (args, opts))
30596     return NULL_TREE;
30597
30598   cl_target_option_save (&cl_opts, opts);
30599   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30600   arm_option_check_internal (opts);
30601   /* Do any overrides, such as global options arch=xxx.  */
30602   arm_option_override_internal (opts, opts_set);
30603
30604   return build_target_option_node (opts);
30605 }
30606
30607 static void
30608 add_attribute  (const char * mode, tree *attributes)
30609 {
30610   size_t len = strlen (mode);
30611   tree value = build_string (len, mode);
30612
30613   TREE_TYPE (value) = build_array_type (char_type_node,
30614                                         build_index_type (size_int (len)));
30615
30616   *attributes = tree_cons (get_identifier ("target"),
30617                            build_tree_list (NULL_TREE, value),
30618                            *attributes);
30619 }
30620
30621 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30622
30623 static void
30624 arm_insert_attributes (tree fndecl, tree * attributes)
30625 {
30626   const char *mode;
30627
30628   if (! TARGET_FLIP_THUMB)
30629     return;
30630
30631   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30632       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30633    return;
30634
30635   /* Nested definitions must inherit mode.  */
30636   if (current_function_decl)
30637    {
30638      mode = TARGET_THUMB ? "thumb" : "arm";
30639      add_attribute (mode, attributes);
30640      return;
30641    }
30642
30643   /* If there is already a setting don't change it.  */
30644   if (lookup_attribute ("target", *attributes) != NULL)
30645     return;
30646
30647   mode = thumb_flipper ? "thumb" : "arm";
30648   add_attribute (mode, attributes);
30649
30650   thumb_flipper = !thumb_flipper;
30651 }
30652
30653 /* Hook to validate attribute((target("string"))).  */
30654
30655 static bool
30656 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30657                               tree args, int ARG_UNUSED (flags))
30658 {
30659   bool ret = true;
30660   struct gcc_options func_options;
30661   tree cur_tree, new_optimize;
30662   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30663
30664   /* Get the optimization options of the current function.  */
30665   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30666
30667   /* If the function changed the optimization levels as well as setting target
30668      options, start with the optimizations specified.  */
30669   if (!func_optimize)
30670     func_optimize = optimization_default_node;
30671
30672   /* Init func_options.  */
30673   memset (&func_options, 0, sizeof (func_options));
30674   init_options_struct (&func_options, NULL);
30675   lang_hooks.init_options_struct (&func_options);
30676
30677   /* Initialize func_options to the defaults.  */
30678   cl_optimization_restore (&func_options,
30679                            TREE_OPTIMIZATION (func_optimize));
30680
30681   cl_target_option_restore (&func_options,
30682                             TREE_TARGET_OPTION (target_option_default_node));
30683
30684   /* Set func_options flags with new target mode.  */
30685   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30686                                               &global_options_set);
30687
30688   if (cur_tree == NULL_TREE)
30689     ret = false;
30690
30691   new_optimize = build_optimization_node (&func_options);
30692
30693   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30694
30695   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30696
30697   finalize_options_struct (&func_options);
30698
30699   return ret;
30700 }
30701
30702 /* Match an ISA feature bitmap to a named FPU.  We always use the
30703    first entry that exactly matches the feature set, so that we
30704    effectively canonicalize the FPU name for the assembler.  */
30705 static const char*
30706 arm_identify_fpu_from_isa (sbitmap isa)
30707 {
30708   auto_sbitmap fpubits (isa_num_bits);
30709   auto_sbitmap cand_fpubits (isa_num_bits);
30710
30711   bitmap_and (fpubits, isa, isa_all_fpubits);
30712
30713   /* If there are no ISA feature bits relating to the FPU, we must be
30714      doing soft-float.  */
30715   if (bitmap_empty_p (fpubits))
30716     return "softvfp";
30717
30718   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30719     {
30720       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30721       if (bitmap_equal_p (fpubits, cand_fpubits))
30722         return all_fpus[i].name;
30723     }
30724   /* We must find an entry, or things have gone wrong.  */
30725   gcc_unreachable ();
30726 }
30727
30728 void
30729 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30730 {
30731
30732   fprintf (stream, "\t.syntax unified\n");
30733
30734   if (TARGET_THUMB)
30735     {
30736       if (is_called_in_ARM_mode (decl)
30737           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30738               && cfun->is_thunk))
30739         fprintf (stream, "\t.code 32\n");
30740       else if (TARGET_THUMB1)
30741         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30742       else
30743         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30744     }
30745   else
30746     fprintf (stream, "\t.arm\n");
30747
30748   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30749                (TARGET_SOFT_FLOAT
30750                 ? "softvfp"
30751                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30752
30753   if (TARGET_POKE_FUNCTION_NAME)
30754     arm_poke_function_name (stream, (const char *) name);
30755 }
30756
30757 /* If MEM is in the form of [base+offset], extract the two parts
30758    of address and set to BASE and OFFSET, otherwise return false
30759    after clearing BASE and OFFSET.  */
30760
30761 static bool
30762 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30763 {
30764   rtx addr;
30765
30766   gcc_assert (MEM_P (mem));
30767
30768   addr = XEXP (mem, 0);
30769
30770   /* Strip off const from addresses like (const (addr)).  */
30771   if (GET_CODE (addr) == CONST)
30772     addr = XEXP (addr, 0);
30773
30774   if (GET_CODE (addr) == REG)
30775     {
30776       *base = addr;
30777       *offset = const0_rtx;
30778       return true;
30779     }
30780
30781   if (GET_CODE (addr) == PLUS
30782       && GET_CODE (XEXP (addr, 0)) == REG
30783       && CONST_INT_P (XEXP (addr, 1)))
30784     {
30785       *base = XEXP (addr, 0);
30786       *offset = XEXP (addr, 1);
30787       return true;
30788     }
30789
30790   *base = NULL_RTX;
30791   *offset = NULL_RTX;
30792
30793   return false;
30794 }
30795
30796 /* If INSN is a load or store of address in the form of [base+offset],
30797    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30798    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30799    otherwise return FALSE.  */
30800
30801 static bool
30802 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30803 {
30804   rtx x, dest, src;
30805
30806   gcc_assert (INSN_P (insn));
30807   x = PATTERN (insn);
30808   if (GET_CODE (x) != SET)
30809     return false;
30810
30811   src = SET_SRC (x);
30812   dest = SET_DEST (x);
30813   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30814     {
30815       *is_load = false;
30816       extract_base_offset_in_addr (dest, base, offset);
30817     }
30818   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30819     {
30820       *is_load = true;
30821       extract_base_offset_in_addr (src, base, offset);
30822     }
30823   else
30824     return false;
30825
30826   return (*base != NULL_RTX && *offset != NULL_RTX);
30827 }
30828
30829 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30830
30831    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30832    and PRI are only calculated for these instructions.  For other instruction,
30833    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30834    instruction fusion can be supported by returning different priorities.
30835
30836    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30837
30838 static void
30839 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30840                            int *fusion_pri, int *pri)
30841 {
30842   int tmp, off_val;
30843   bool is_load;
30844   rtx base, offset;
30845
30846   gcc_assert (INSN_P (insn));
30847
30848   tmp = max_pri - 1;
30849   if (!fusion_load_store (insn, &base, &offset, &is_load))
30850     {
30851       *pri = tmp;
30852       *fusion_pri = tmp;
30853       return;
30854     }
30855
30856   /* Load goes first.  */
30857   if (is_load)
30858     *fusion_pri = tmp - 1;
30859   else
30860     *fusion_pri = tmp - 2;
30861
30862   tmp /= 2;
30863
30864   /* INSN with smaller base register goes first.  */
30865   tmp -= ((REGNO (base) & 0xff) << 20);
30866
30867   /* INSN with smaller offset goes first.  */
30868   off_val = (int)(INTVAL (offset));
30869   if (off_val >= 0)
30870     tmp -= (off_val & 0xfffff);
30871   else
30872     tmp += ((- off_val) & 0xfffff);
30873
30874   *pri = tmp;
30875   return;
30876 }
30877
30878
30879 /* Construct and return a PARALLEL RTX vector with elements numbering the
30880    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30881    the vector - from the perspective of the architecture.  This does not
30882    line up with GCC's perspective on lane numbers, so we end up with
30883    different masks depending on our target endian-ness.  The diagram
30884    below may help.  We must draw the distinction when building masks
30885    which select one half of the vector.  An instruction selecting
30886    architectural low-lanes for a big-endian target, must be described using
30887    a mask selecting GCC high-lanes.
30888
30889                  Big-Endian             Little-Endian
30890
30891 GCC             0   1   2   3           3   2   1   0
30892               | x | x | x | x |       | x | x | x | x |
30893 Architecture    3   2   1   0           3   2   1   0
30894
30895 Low Mask:         { 2, 3 }                { 0, 1 }
30896 High Mask:        { 0, 1 }                { 2, 3 }
30897 */
30898
30899 rtx
30900 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30901 {
30902   int nunits = GET_MODE_NUNITS (mode);
30903   rtvec v = rtvec_alloc (nunits / 2);
30904   int high_base = nunits / 2;
30905   int low_base = 0;
30906   int base;
30907   rtx t1;
30908   int i;
30909
30910   if (BYTES_BIG_ENDIAN)
30911     base = high ? low_base : high_base;
30912   else
30913     base = high ? high_base : low_base;
30914
30915   for (i = 0; i < nunits / 2; i++)
30916     RTVEC_ELT (v, i) = GEN_INT (base + i);
30917
30918   t1 = gen_rtx_PARALLEL (mode, v);
30919   return t1;
30920 }
30921
30922 /* Check OP for validity as a PARALLEL RTX vector with elements
30923    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30924    from the perspective of the architecture.  See the diagram above
30925    arm_simd_vect_par_cnst_half_p for more details.  */
30926
30927 bool
30928 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30929                                        bool high)
30930 {
30931   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30932   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30933   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30934   int i = 0;
30935
30936   if (!VECTOR_MODE_P (mode))
30937     return false;
30938
30939   if (count_op != count_ideal)
30940     return false;
30941
30942   for (i = 0; i < count_ideal; i++)
30943     {
30944       rtx elt_op = XVECEXP (op, 0, i);
30945       rtx elt_ideal = XVECEXP (ideal, 0, i);
30946
30947       if (!CONST_INT_P (elt_op)
30948           || INTVAL (elt_ideal) != INTVAL (elt_op))
30949         return false;
30950     }
30951   return true;
30952 }
30953
30954 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30955    in Thumb1.  */
30956 static bool
30957 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30958                          const_tree)
30959 {
30960   /* For now, we punt and not handle this for TARGET_THUMB1.  */
30961   if (vcall_offset && TARGET_THUMB1)
30962     return false;
30963
30964   /* Otherwise ok.  */
30965   return true;
30966 }
30967
30968 /* Generate RTL for a conditional branch with rtx comparison CODE in
30969    mode CC_MODE. The destination of the unlikely conditional branch
30970    is LABEL_REF.  */
30971
30972 void
30973 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30974                           rtx label_ref)
30975 {
30976   rtx x;
30977   x = gen_rtx_fmt_ee (code, VOIDmode,
30978                       gen_rtx_REG (cc_mode, CC_REGNUM),
30979                       const0_rtx);
30980
30981   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30982                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
30983                             pc_rtx);
30984   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30985 }
30986
30987 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30988
30989    For pure-code sections there is no letter code for this attribute, so
30990    output all the section flags numerically when this is needed.  */
30991
30992 static bool
30993 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30994 {
30995
30996   if (flags & SECTION_ARM_PURECODE)
30997     {
30998       *num = 0x20000000;
30999
31000       if (!(flags & SECTION_DEBUG))
31001         *num |= 0x2;
31002       if (flags & SECTION_EXCLUDE)
31003         *num |= 0x80000000;
31004       if (flags & SECTION_WRITE)
31005         *num |= 0x1;
31006       if (flags & SECTION_CODE)
31007         *num |= 0x4;
31008       if (flags & SECTION_MERGE)
31009         *num |= 0x10;
31010       if (flags & SECTION_STRINGS)
31011         *num |= 0x20;
31012       if (flags & SECTION_TLS)
31013         *num |= 0x400;
31014       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31015         *num |= 0x200;
31016
31017         return true;
31018     }
31019
31020   return false;
31021 }
31022
31023 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31024
31025    If pure-code is passed as an option, make sure all functions are in
31026    sections that have the SHF_ARM_PURECODE attribute.  */
31027
31028 static section *
31029 arm_function_section (tree decl, enum node_frequency freq,
31030                       bool startup, bool exit)
31031 {
31032   const char * section_name;
31033   section * sec;
31034
31035   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31036     return default_function_section (decl, freq, startup, exit);
31037
31038   if (!target_pure_code)
31039     return default_function_section (decl, freq, startup, exit);
31040
31041
31042   section_name = DECL_SECTION_NAME (decl);
31043
31044   /* If a function is not in a named section then it falls under the 'default'
31045      text section, also known as '.text'.  We can preserve previous behavior as
31046      the default text section already has the SHF_ARM_PURECODE section
31047      attribute.  */
31048   if (!section_name)
31049     {
31050       section *default_sec = default_function_section (decl, freq, startup,
31051                                                        exit);
31052
31053       /* If default_sec is not null, then it must be a special section like for
31054          example .text.startup.  We set the pure-code attribute and return the
31055          same section to preserve existing behavior.  */
31056       if (default_sec)
31057           default_sec->common.flags |= SECTION_ARM_PURECODE;
31058       return default_sec;
31059     }
31060
31061   /* Otherwise look whether a section has already been created with
31062      'section_name'.  */
31063   sec = get_named_section (decl, section_name, 0);
31064   if (!sec)
31065     /* If that is not the case passing NULL as the section's name to
31066        'get_named_section' will create a section with the declaration's
31067        section name.  */
31068     sec = get_named_section (decl, NULL, 0);
31069
31070   /* Set the SHF_ARM_PURECODE attribute.  */
31071   sec->common.flags |= SECTION_ARM_PURECODE;
31072
31073   return sec;
31074 }
31075
31076 /* Implements the TARGET_SECTION_FLAGS hook.
31077
31078    If DECL is a function declaration and pure-code is passed as an option
31079    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31080    section's name and RELOC indicates whether the declarations initializer may
31081    contain runtime relocations.  */
31082
31083 static unsigned int
31084 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31085 {
31086   unsigned int flags = default_section_type_flags (decl, name, reloc);
31087
31088   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31089     flags |= SECTION_ARM_PURECODE;
31090
31091   return flags;
31092 }
31093
31094 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31095
31096 static void
31097 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31098                            rtx op0, rtx op1,
31099                            rtx *quot_p, rtx *rem_p)
31100 {
31101   if (mode == SImode)
31102     gcc_assert (!TARGET_IDIV);
31103
31104   scalar_int_mode libval_mode
31105     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31106
31107   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31108                                         libval_mode,
31109                                         op0, GET_MODE (op0),
31110                                         op1, GET_MODE (op1));
31111
31112   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31113   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31114                                        GET_MODE_SIZE (mode));
31115
31116   gcc_assert (quotient);
31117   gcc_assert (remainder);
31118
31119   *quot_p = quotient;
31120   *rem_p = remainder;
31121 }
31122
31123 /*  This function checks for the availability of the coprocessor builtin passed
31124     in BUILTIN for the current target.  Returns true if it is available and
31125     false otherwise.  If a BUILTIN is passed for which this function has not
31126     been implemented it will cause an exception.  */
31127
31128 bool
31129 arm_coproc_builtin_available (enum unspecv builtin)
31130 {
31131   /* None of these builtins are available in Thumb mode if the target only
31132      supports Thumb-1.  */
31133   if (TARGET_THUMB1)
31134     return false;
31135
31136   switch (builtin)
31137     {
31138       case VUNSPEC_CDP:
31139       case VUNSPEC_LDC:
31140       case VUNSPEC_LDCL:
31141       case VUNSPEC_STC:
31142       case VUNSPEC_STCL:
31143       case VUNSPEC_MCR:
31144       case VUNSPEC_MRC:
31145         if (arm_arch4)
31146           return true;
31147         break;
31148       case VUNSPEC_CDP2:
31149       case VUNSPEC_LDC2:
31150       case VUNSPEC_LDC2L:
31151       case VUNSPEC_STC2:
31152       case VUNSPEC_STC2L:
31153       case VUNSPEC_MCR2:
31154       case VUNSPEC_MRC2:
31155         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31156            ARMv8-{A,M}.  */
31157         if (arm_arch5)
31158           return true;
31159         break;
31160       case VUNSPEC_MCRR:
31161       case VUNSPEC_MRRC:
31162         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31163            ARMv8-{A,M}.  */
31164         if (arm_arch6 || arm_arch5te)
31165           return true;
31166         break;
31167       case VUNSPEC_MCRR2:
31168       case VUNSPEC_MRRC2:
31169         if (arm_arch6)
31170           return true;
31171         break;
31172       default:
31173         gcc_unreachable ();
31174     }
31175   return false;
31176 }
31177
31178 /* This function returns true if OP is a valid memory operand for the ldc and
31179    stc coprocessor instructions and false otherwise.  */
31180
31181 bool
31182 arm_coproc_ldc_stc_legitimate_address (rtx op)
31183 {
31184   HOST_WIDE_INT range;
31185   /* Has to be a memory operand.  */
31186   if (!MEM_P (op))
31187     return false;
31188
31189   op = XEXP (op, 0);
31190
31191   /* We accept registers.  */
31192   if (REG_P (op))
31193     return true;
31194
31195   switch GET_CODE (op)
31196     {
31197       case PLUS:
31198         {
31199           /* Or registers with an offset.  */
31200           if (!REG_P (XEXP (op, 0)))
31201             return false;
31202
31203           op = XEXP (op, 1);
31204
31205           /* The offset must be an immediate though.  */
31206           if (!CONST_INT_P (op))
31207             return false;
31208
31209           range = INTVAL (op);
31210
31211           /* Within the range of [-1020,1020].  */
31212           if (!IN_RANGE (range, -1020, 1020))
31213             return false;
31214
31215           /* And a multiple of 4.  */
31216           return (range % 4) == 0;
31217         }
31218       case PRE_INC:
31219       case POST_INC:
31220       case PRE_DEC:
31221       case POST_DEC:
31222         return REG_P (XEXP (op, 0));
31223       default:
31224         gcc_unreachable ();
31225     }
31226   return false;
31227 }
31228
31229 #if CHECKING_P
31230 namespace selftest {
31231
31232 /* Scan the static data tables generated by parsecpu.awk looking for
31233    potential issues with the data.  We primarily check for
31234    inconsistencies in the option extensions at present (extensions
31235    that duplicate others but aren't marked as aliases).  Furthermore,
31236    for correct canonicalization later options must never be a subset
31237    of an earlier option.  Any extension should also only specify other
31238    feature bits and never an architecture bit.  The architecture is inferred
31239    from the declaration of the extension.  */
31240 static void
31241 arm_test_cpu_arch_data (void)
31242 {
31243   const arch_option *arch;
31244   const cpu_option *cpu;
31245   auto_sbitmap target_isa (isa_num_bits);
31246   auto_sbitmap isa1 (isa_num_bits);
31247   auto_sbitmap isa2 (isa_num_bits);
31248
31249   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31250     {
31251       const cpu_arch_extension *ext1, *ext2;
31252
31253       if (arch->common.extensions == NULL)
31254         continue;
31255
31256       arm_initialize_isa (target_isa, arch->common.isa_bits);
31257
31258       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31259         {
31260           if (ext1->alias)
31261             continue;
31262
31263           arm_initialize_isa (isa1, ext1->isa_bits);
31264           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31265             {
31266               if (ext2->alias || ext1->remove != ext2->remove)
31267                 continue;
31268
31269               arm_initialize_isa (isa2, ext2->isa_bits);
31270               /* If the option is a subset of the parent option, it doesn't
31271                  add anything and so isn't useful.  */
31272               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31273
31274               /* If the extension specifies any architectural bits then
31275                  disallow it.  Extensions should only specify feature bits.  */
31276               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31277             }
31278         }
31279     }
31280
31281   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31282     {
31283       const cpu_arch_extension *ext1, *ext2;
31284
31285       if (cpu->common.extensions == NULL)
31286         continue;
31287
31288       arm_initialize_isa (target_isa, arch->common.isa_bits);
31289
31290       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31291         {
31292           if (ext1->alias)
31293             continue;
31294
31295           arm_initialize_isa (isa1, ext1->isa_bits);
31296           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31297             {
31298               if (ext2->alias || ext1->remove != ext2->remove)
31299                 continue;
31300
31301               arm_initialize_isa (isa2, ext2->isa_bits);
31302               /* If the option is a subset of the parent option, it doesn't
31303                  add anything and so isn't useful.  */
31304               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31305
31306               /* If the extension specifies any architectural bits then
31307                  disallow it.  Extensions should only specify feature bits.  */
31308               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31309             }
31310         }
31311     }
31312 }
31313
31314 static void
31315 arm_run_selftests (void)
31316 {
31317   arm_test_cpu_arch_data ();
31318 }
31319 } /* Namespace selftest.  */
31320
31321 #undef TARGET_RUN_TARGET_SELFTESTS
31322 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31323 #endif /* CHECKING_P */
31324
31325 struct gcc_target targetm = TARGET_INITIALIZER;
31326
31327 #include "gt-arm.h"