gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 291                                              const unsigned char *sel);
 292
 293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 294
 295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 296                                            tree vectype,
 297                                            int misalign ATTRIBUTE_UNUSED);
 298 static unsigned arm_add_stmt_cost (void *data, int count,
 299                                    enum vect_cost_for_stmt kind,
 300                                    struct _stmt_vec_info *stmt_info,
 301                                    int misalign,
 302                                    enum vect_cost_model_location where);
 303
 304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 305                                          bool op0_preserve_value);
 306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 307
 308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 310                                      const_tree);
 311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 314                                                 int reloc);
 315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 \f
 320 /* Table of machine attributes.  */
 321 static const struct attribute_spec arm_attribute_table[] =
 322 {
 323   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 324        affects_type_identity } */
 325   /* Function calls made to this symbol must be done indirectly, because
 326      it may lie outside of the 26 bit addressing range of a normal function
 327      call.  */
 328   { "long_call",    0, 0, false, true,  true,  NULL, false },
 329   /* Whereas these functions are always known to reside within the 26 bit
 330      addressing range.  */
 331   { "short_call",   0, 0, false, true,  true,  NULL, false },
 332   /* Specify the procedure call conventions for a function.  */
 333   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 334     false },
 335   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 336   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 337     false },
 338   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 339     false },
 340   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 341     false },
 342 #ifdef ARM_PE
 343   /* ARM/PE has three new attributes:
 344      interfacearm - ?
 345      dllexport - for exporting a function/variable that will live in a dll
 346      dllimport - for importing a function/variable from a dll
 347
 348      Microsoft allows multiple declspecs in one __declspec, separating
 349      them with spaces.  We do NOT support this.  Instead, use __declspec
 350      multiple times.
 351   */
 352   { "dllimport",    0, 0, true,  false, false, NULL, false },
 353   { "dllexport",    0, 0, true,  false, false, NULL, false },
 354   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 355     false },
 356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 357   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 358   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 360     false },
 361 #endif
 362   /* ARMv8-M Security Extensions support.  */
 363   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 364     arm_handle_cmse_nonsecure_entry, false },
 365   { "cmse_nonsecure_call", 0, 0, true, false, false,
 366     arm_handle_cmse_nonsecure_call, true },
 367   { NULL,           0, 0, false, false, false, NULL, false }
 368 };
 369 \f
 370 /* Initialize the GCC target structure.  */
 371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 372 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 374 #endif
 375
 376 #undef TARGET_LEGITIMIZE_ADDRESS
 377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 378
 379 #undef  TARGET_ATTRIBUTE_TABLE
 380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 381
 382 #undef  TARGET_INSERT_ATTRIBUTES
 383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 384
 385 #undef TARGET_ASM_FILE_START
 386 #define TARGET_ASM_FILE_START arm_file_start
 387 #undef TARGET_ASM_FILE_END
 388 #define TARGET_ASM_FILE_END arm_file_end
 389
 390 #undef  TARGET_ASM_ALIGNED_SI_OP
 391 #define TARGET_ASM_ALIGNED_SI_OP NULL
 392 #undef  TARGET_ASM_INTEGER
 393 #define TARGET_ASM_INTEGER arm_assemble_integer
 394
 395 #undef TARGET_PRINT_OPERAND
 396 #define TARGET_PRINT_OPERAND arm_print_operand
 397 #undef TARGET_PRINT_OPERAND_ADDRESS
 398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 401
 402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 404
 405 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 407
 408 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 410
 411 #undef TARGET_CAN_INLINE_P
 412 #define TARGET_CAN_INLINE_P arm_can_inline_p
 413
 414 #undef TARGET_RELAYOUT_FUNCTION
 415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 416
 417 #undef  TARGET_OPTION_OVERRIDE
 418 #define TARGET_OPTION_OVERRIDE arm_option_override
 419
 420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 422
 423 #undef TARGET_OPTION_SAVE
 424 #define TARGET_OPTION_SAVE arm_option_save
 425
 426 #undef TARGET_OPTION_RESTORE
 427 #define TARGET_OPTION_RESTORE arm_option_restore
 428
 429 #undef TARGET_OPTION_PRINT
 430 #define TARGET_OPTION_PRINT arm_option_print
 431
 432 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 434
 435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 437
 438 #undef TARGET_SCHED_MACRO_FUSION_P
 439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 440
 441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 443
 444 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 446
 447 #undef  TARGET_SCHED_ADJUST_COST
 448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 449
 450 #undef TARGET_SET_CURRENT_FUNCTION
 451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 452
 453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 455
 456 #undef TARGET_SCHED_REORDER
 457 #define TARGET_SCHED_REORDER arm_sched_reorder
 458
 459 #undef TARGET_REGISTER_MOVE_COST
 460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 461
 462 #undef TARGET_MEMORY_MOVE_COST
 463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 464
 465 #undef TARGET_ENCODE_SECTION_INFO
 466 #ifdef ARM_PE
 467 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 468 #else
 469 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 470 #endif
 471
 472 #undef  TARGET_STRIP_NAME_ENCODING
 473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 474
 475 #undef  TARGET_ASM_INTERNAL_LABEL
 476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 477
 478 #undef TARGET_FLOATN_MODE
 479 #define TARGET_FLOATN_MODE arm_floatn_mode
 480
 481 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 483
 484 #undef  TARGET_FUNCTION_VALUE
 485 #define TARGET_FUNCTION_VALUE arm_function_value
 486
 487 #undef  TARGET_LIBCALL_VALUE
 488 #define TARGET_LIBCALL_VALUE arm_libcall_value
 489
 490 #undef TARGET_FUNCTION_VALUE_REGNO_P
 491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 492
 493 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 495 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 497
 498 #undef  TARGET_RTX_COSTS
 499 #define TARGET_RTX_COSTS arm_rtx_costs
 500 #undef  TARGET_ADDRESS_COST
 501 #define TARGET_ADDRESS_COST arm_address_cost
 502
 503 #undef TARGET_SHIFT_TRUNCATION_MASK
 504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 513   arm_autovectorize_vector_sizes
 514
 515 #undef  TARGET_MACHINE_DEPENDENT_REORG
 516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 517
 518 #undef  TARGET_INIT_BUILTINS
 519 #define TARGET_INIT_BUILTINS  arm_init_builtins
 520 #undef  TARGET_EXPAND_BUILTIN
 521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 522 #undef  TARGET_BUILTIN_DECL
 523 #define TARGET_BUILTIN_DECL arm_builtin_decl
 524
 525 #undef TARGET_INIT_LIBFUNCS
 526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 527
 528 #undef TARGET_PROMOTE_FUNCTION_MODE
 529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 530 #undef TARGET_PROMOTE_PROTOTYPES
 531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 532 #undef TARGET_PASS_BY_REFERENCE
 533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 534 #undef TARGET_ARG_PARTIAL_BYTES
 535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 536 #undef TARGET_FUNCTION_ARG
 537 #define TARGET_FUNCTION_ARG arm_function_arg
 538 #undef TARGET_FUNCTION_ARG_ADVANCE
 539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 540 #undef TARGET_FUNCTION_ARG_PADDING
 541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 542 #undef TARGET_FUNCTION_ARG_BOUNDARY
 543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 544
 545 #undef  TARGET_SETUP_INCOMING_VARARGS
 546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 547
 548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 550
 551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 553 #undef TARGET_TRAMPOLINE_INIT
 554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 557
 558 #undef TARGET_WARN_FUNC_RETURN
 559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 560
 561 #undef TARGET_DEFAULT_SHORT_ENUMS
 562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 563
 564 #undef TARGET_ALIGN_ANON_BITFIELD
 565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 566
 567 #undef TARGET_NARROW_VOLATILE_BITFIELD
 568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 569
 570 #undef TARGET_CXX_GUARD_TYPE
 571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 572
 573 #undef TARGET_CXX_GUARD_MASK_BIT
 574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 575
 576 #undef TARGET_CXX_GET_COOKIE_SIZE
 577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 578
 579 #undef TARGET_CXX_COOKIE_HAS_SIZE
 580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 581
 582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 584
 585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 587
 588 #undef TARGET_CXX_USE_AEABI_ATEXIT
 589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 590
 591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 593   arm_cxx_determine_class_data_visibility
 594
 595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 597
 598 #undef TARGET_RETURN_IN_MSB
 599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 600
 601 #undef TARGET_RETURN_IN_MEMORY
 602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 603
 604 #undef TARGET_MUST_PASS_IN_STACK
 605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 606
 607 #if ARM_UNWIND_INFO
 608 #undef TARGET_ASM_UNWIND_EMIT
 609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 610
 611 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 612 #undef TARGET_ASM_TTYPE
 613 #define TARGET_ASM_TTYPE arm_output_ttype
 614
 615 #undef TARGET_ARM_EABI_UNWINDER
 616 #define TARGET_ARM_EABI_UNWINDER true
 617
 618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 620
 621 #endif /* ARM_UNWIND_INFO */
 622
 623 #undef TARGET_ASM_INIT_SECTIONS
 624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 625
 626 #undef TARGET_DWARF_REGISTER_SPAN
 627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 628
 629 #undef  TARGET_CANNOT_COPY_INSN_P
 630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 631
 632 #ifdef HAVE_AS_TLS
 633 #undef TARGET_HAVE_TLS
 634 #define TARGET_HAVE_TLS true
 635 #endif
 636
 637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 639
 640 #undef TARGET_LEGITIMATE_CONSTANT_P
 641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 642
 643 #undef TARGET_CANNOT_FORCE_CONST_MEM
 644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 645
 646 #undef TARGET_MAX_ANCHOR_OFFSET
 647 #define TARGET_MAX_ANCHOR_OFFSET 4095
 648
 649 /* The minimum is set such that the total size of the block
 650    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 651    divisible by eight, ensuring natural spacing of anchors.  */
 652 #undef TARGET_MIN_ANCHOR_OFFSET
 653 #define TARGET_MIN_ANCHOR_OFFSET -4088
 654
 655 #undef TARGET_SCHED_ISSUE_RATE
 656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 657
 658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 660   arm_first_cycle_multipass_dfa_lookahead
 661
 662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 664   arm_first_cycle_multipass_dfa_lookahead_guard
 665
 666 #undef TARGET_MANGLE_TYPE
 667 #define TARGET_MANGLE_TYPE arm_mangle_type
 668
 669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 671
 672 #undef TARGET_BUILD_BUILTIN_VA_LIST
 673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 674 #undef TARGET_EXPAND_BUILTIN_VA_START
 675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 678
 679 #ifdef HAVE_AS_TLS
 680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 682 #endif
 683
 684 #undef TARGET_LEGITIMATE_ADDRESS_P
 685 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 686
 687 #undef TARGET_PREFERRED_RELOAD_CLASS
 688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 689
 690 #undef TARGET_PROMOTED_TYPE
 691 #define TARGET_PROMOTED_TYPE arm_promoted_type
 692
 693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 695
 696 #undef TARGET_COMPUTE_FRAME_LAYOUT
 697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 698
 699 #undef TARGET_FRAME_POINTER_REQUIRED
 700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 701
 702 #undef TARGET_CAN_ELIMINATE
 703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 704
 705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 707
 708 #undef TARGET_CLASS_LIKELY_SPILLED_P
 709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 710
 711 #undef TARGET_VECTORIZE_BUILTINS
 712 #define TARGET_VECTORIZE_BUILTINS
 713
 714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 716   arm_builtin_vectorized_function
 717
 718 #undef TARGET_VECTOR_ALIGNMENT
 719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 720
 721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 723   arm_vector_alignment_reachable
 724
 725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 727   arm_builtin_support_vector_misalignment
 728
 729 #undef TARGET_PREFERRED_RENAME_CLASS
 730 #define TARGET_PREFERRED_RENAME_CLASS \
 731   arm_preferred_rename_class
 732
 733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 735   arm_vectorize_vec_perm_const_ok
 736
 737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 739   arm_builtin_vectorization_cost
 740 #undef TARGET_VECTORIZE_ADD_STMT_COST
 741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 742
 743 #undef TARGET_CANONICALIZE_COMPARISON
 744 #define TARGET_CANONICALIZE_COMPARISON \
 745   arm_canonicalize_comparison
 746
 747 #undef TARGET_ASAN_SHADOW_OFFSET
 748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 749
 750 #undef MAX_INSN_PER_IT_BLOCK
 751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 752
 753 #undef TARGET_CAN_USE_DOLOOP_P
 754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 755
 756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 758
 759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 761
 762 #undef TARGET_SCHED_FUSION_PRIORITY
 763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 764
 765 #undef  TARGET_ASM_FUNCTION_SECTION
 766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 767
 768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 770
 771 #undef TARGET_SECTION_TYPE_FLAGS
 772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 773
 774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 776
 777 #undef TARGET_C_EXCESS_PRECISION
 778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 779
 780 /* Although the architecture reserves bits 0 and 1, only the former is
 781    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 784
 785 #undef TARGET_FIXED_CONDITION_CODE_REGS
 786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 787
 788 #undef TARGET_HARD_REGNO_MODE_OK
 789 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 790
 791 #undef TARGET_MODES_TIEABLE_P
 792 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 793 \f
 794 /* Obstack for minipool constant handling.  */
 795 static struct obstack minipool_obstack;
 796 static char *         minipool_startobj;
 797
 798 /* The maximum number of insns skipped which
 799    will be conditionalised if possible.  */
 800 static int max_insns_skipped = 5;
 801
 802 extern FILE * asm_out_file;
 803
 804 /* True if we are currently building a constant table.  */
 805 int making_const_table;
 806
 807 /* The processor for which instructions should be scheduled.  */
 808 enum processor_type arm_tune = TARGET_CPU_arm_none;
 809
 810 /* The current tuning set.  */
 811 const struct tune_params *current_tune;
 812
 813 /* Which floating point hardware to schedule for.  */
 814 int arm_fpu_attr;
 815
 816 /* Used for Thumb call_via trampolines.  */
 817 rtx thumb_call_via_label[14];
 818 static int thumb_call_reg_needed;
 819
 820 /* The bits in this mask specify which instruction scheduling options should
 821    be used.  */
 822 unsigned int tune_flags = 0;
 823
 824 /* The highest ARM architecture version supported by the
 825    target.  */
 826 enum base_architecture arm_base_arch = BASE_ARCH_0;
 827
 828 /* Active target architecture and tuning.  */
 829
 830 struct arm_build_target arm_active_target;
 831
 832 /* The following are used in the arm.md file as equivalents to bits
 833    in the above two flag variables.  */
 834
 835 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 836 int arm_arch3m = 0;
 837
 838 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 839 int arm_arch4 = 0;
 840
 841 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 842 int arm_arch4t = 0;
 843
 844 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 845 int arm_arch5 = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 848 int arm_arch5e = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 851 int arm_arch5te = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 854 int arm_arch6 = 0;
 855
 856 /* Nonzero if this chip supports the ARM 6K extensions.  */
 857 int arm_arch6k = 0;
 858
 859 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 860 int arm_arch6kz = 0;
 861
 862 /* Nonzero if instructions present in ARMv6-M can be used.  */
 863 int arm_arch6m = 0;
 864
 865 /* Nonzero if this chip supports the ARM 7 extensions.  */
 866 int arm_arch7 = 0;
 867
 868 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 869 int arm_arch_lpae = 0;
 870
 871 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 872 int arm_arch_notm = 0;
 873
 874 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 875 int arm_arch7em = 0;
 876
 877 /* Nonzero if instructions present in ARMv8 can be used.  */
 878 int arm_arch8 = 0;
 879
 880 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 881 int arm_arch8_1 = 0;
 882
 883 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 884 int arm_arch8_2 = 0;
 885
 886 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 887    Architecture 8.2.  */
 888 int arm_fp16_inst = 0;
 889
 890 /* Nonzero if this chip can benefit from load scheduling.  */
 891 int arm_ld_sched = 0;
 892
 893 /* Nonzero if this chip is a StrongARM.  */
 894 int arm_tune_strongarm = 0;
 895
 896 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 897 int arm_arch_iwmmxt = 0;
 898
 899 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 900 int arm_arch_iwmmxt2 = 0;
 901
 902 /* Nonzero if this chip is an XScale.  */
 903 int arm_arch_xscale = 0;
 904
 905 /* Nonzero if tuning for XScale  */
 906 int arm_tune_xscale = 0;
 907
 908 /* Nonzero if we want to tune for stores that access the write-buffer.
 909    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 910 int arm_tune_wbuf = 0;
 911
 912 /* Nonzero if tuning for Cortex-A9.  */
 913 int arm_tune_cortex_a9 = 0;
 914
 915 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 916    preprocessor.
 917    XXX This is a bit of a hack, it's intended to help work around
 918    problems in GLD which doesn't understand that armv5t code is
 919    interworking clean.  */
 920 int arm_cpp_interwork = 0;
 921
 922 /* Nonzero if chip supports Thumb 1.  */
 923 int arm_arch_thumb1;
 924
 925 /* Nonzero if chip supports Thumb 2.  */
 926 int arm_arch_thumb2;
 927
 928 /* Nonzero if chip supports integer division instruction.  */
 929 int arm_arch_arm_hwdiv;
 930 int arm_arch_thumb_hwdiv;
 931
 932 /* Nonzero if chip disallows volatile memory access in IT block.  */
 933 int arm_arch_no_volatile_ce;
 934
 935 /* Nonzero if we should use Neon to handle 64-bits operations rather
 936    than core registers.  */
 937 int prefer_neon_for_64bits = 0;
 938
 939 /* Nonzero if we shouldn't use literal pools.  */
 940 bool arm_disable_literal_pool = false;
 941
 942 /* The register number to be used for the PIC offset register.  */
 943 unsigned arm_pic_register = INVALID_REGNUM;
 944
 945 enum arm_pcs arm_pcs_default;
 946
 947 /* For an explanation of these variables, see final_prescan_insn below.  */
 948 int arm_ccfsm_state;
 949 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 950 enum arm_cond_code arm_current_cc;
 951
 952 rtx arm_target_insn;
 953 int arm_target_label;
 954 /* The number of conditionally executed insns, including the current insn.  */
 955 int arm_condexec_count = 0;
 956 /* A bitmask specifying the patterns for the IT block.
 957    Zero means do not output an IT block before this insn. */
 958 int arm_condexec_mask = 0;
 959 /* The number of bits used in arm_condexec_mask.  */
 960 int arm_condexec_masklen = 0;
 961
 962 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 963 int arm_arch_crc = 0;
 964
 965 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 966 int arm_arch_cmse = 0;
 967
 968 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 969 int arm_m_profile_small_mul = 0;
 970
 971 /* The condition codes of the ARM, and the inverse function.  */
 972 static const char * const arm_condition_codes[] =
 973 {
 974   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 975   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 976 };
 977
 978 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 979 int arm_regs_in_sequence[] =
 980 {
 981   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 982 };
 983
 984 #define ARM_LSL_NAME "lsl"
 985 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 986
 987 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 988                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 989                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 990 \f
 991 /* Initialization code.  */
 992
 993 struct cpu_tune
 994 {
 995   enum processor_type scheduler;
 996   unsigned int tune_flags;
 997   const struct tune_params *tune;
 998 };
 999
1000 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1001 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1002   {                                                             \
1003     num_slots,                                                  \
1004     l1_size,                                                    \
1005     l1_line_size                                                \
1006   }
1007
1008 /* arm generic vectorizer costs.  */
1009 static const
1010 struct cpu_vec_costs arm_default_vec_cost = {
1011   1,                                    /* scalar_stmt_cost.  */
1012   1,                                    /* scalar load_cost.  */
1013   1,                                    /* scalar_store_cost.  */
1014   1,                                    /* vec_stmt_cost.  */
1015   1,                                    /* vec_to_scalar_cost.  */
1016   1,                                    /* scalar_to_vec_cost.  */
1017   1,                                    /* vec_align_load_cost.  */
1018   1,                                    /* vec_unalign_load_cost.  */
1019   1,                                    /* vec_unalign_store_cost.  */
1020   1,                                    /* vec_store_cost.  */
1021   3,                                    /* cond_taken_branch_cost.  */
1022   1,                                    /* cond_not_taken_branch_cost.  */
1023 };
1024
1025 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1026 #include "aarch-cost-tables.h"
1027
1028
1029
1030 const struct cpu_cost_table cortexa9_extra_costs =
1031 {
1032   /* ALU */
1033   {
1034     0,                  /* arith.  */
1035     0,                  /* logical.  */
1036     0,                  /* shift.  */
1037     COSTS_N_INSNS (1),  /* shift_reg.  */
1038     COSTS_N_INSNS (1),  /* arith_shift.  */
1039     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1040     0,                  /* log_shift.  */
1041     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1042     COSTS_N_INSNS (1),  /* extend.  */
1043     COSTS_N_INSNS (2),  /* extend_arith.  */
1044     COSTS_N_INSNS (1),  /* bfi.  */
1045     COSTS_N_INSNS (1),  /* bfx.  */
1046     0,                  /* clz.  */
1047     0,                  /* rev.  */
1048     0,                  /* non_exec.  */
1049     true                /* non_exec_costs_exec.  */
1050   },
1051   {
1052     /* MULT SImode */
1053     {
1054       COSTS_N_INSNS (3),        /* simple.  */
1055       COSTS_N_INSNS (3),        /* flag_setting.  */
1056       COSTS_N_INSNS (2),        /* extend.  */
1057       COSTS_N_INSNS (3),        /* add.  */
1058       COSTS_N_INSNS (2),        /* extend_add.  */
1059       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1060     },
1061     /* MULT DImode */
1062     {
1063       0,                        /* simple (N/A).  */
1064       0,                        /* flag_setting (N/A).  */
1065       COSTS_N_INSNS (4),        /* extend.  */
1066       0,                        /* add (N/A).  */
1067       COSTS_N_INSNS (4),        /* extend_add.  */
1068       0                         /* idiv (N/A).  */
1069     }
1070   },
1071   /* LD/ST */
1072   {
1073     COSTS_N_INSNS (2),  /* load.  */
1074     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1075     COSTS_N_INSNS (2),  /* ldrd.  */
1076     COSTS_N_INSNS (2),  /* ldm_1st.  */
1077     1,                  /* ldm_regs_per_insn_1st.  */
1078     2,                  /* ldm_regs_per_insn_subsequent.  */
1079     COSTS_N_INSNS (5),  /* loadf.  */
1080     COSTS_N_INSNS (5),  /* loadd.  */
1081     COSTS_N_INSNS (1),  /* load_unaligned.  */
1082     COSTS_N_INSNS (2),  /* store.  */
1083     COSTS_N_INSNS (2),  /* strd.  */
1084     COSTS_N_INSNS (2),  /* stm_1st.  */
1085     1,                  /* stm_regs_per_insn_1st.  */
1086     2,                  /* stm_regs_per_insn_subsequent.  */
1087     COSTS_N_INSNS (1),  /* storef.  */
1088     COSTS_N_INSNS (1),  /* stored.  */
1089     COSTS_N_INSNS (1),  /* store_unaligned.  */
1090     COSTS_N_INSNS (1),  /* loadv.  */
1091     COSTS_N_INSNS (1)   /* storev.  */
1092   },
1093   {
1094     /* FP SFmode */
1095     {
1096       COSTS_N_INSNS (14),       /* div.  */
1097       COSTS_N_INSNS (4),        /* mult.  */
1098       COSTS_N_INSNS (7),        /* mult_addsub. */
1099       COSTS_N_INSNS (30),       /* fma.  */
1100       COSTS_N_INSNS (3),        /* addsub.  */
1101       COSTS_N_INSNS (1),        /* fpconst.  */
1102       COSTS_N_INSNS (1),        /* neg.  */
1103       COSTS_N_INSNS (3),        /* compare.  */
1104       COSTS_N_INSNS (3),        /* widen.  */
1105       COSTS_N_INSNS (3),        /* narrow.  */
1106       COSTS_N_INSNS (3),        /* toint.  */
1107       COSTS_N_INSNS (3),        /* fromint.  */
1108       COSTS_N_INSNS (3)         /* roundint.  */
1109     },
1110     /* FP DFmode */
1111     {
1112       COSTS_N_INSNS (24),       /* div.  */
1113       COSTS_N_INSNS (5),        /* mult.  */
1114       COSTS_N_INSNS (8),        /* mult_addsub.  */
1115       COSTS_N_INSNS (30),       /* fma.  */
1116       COSTS_N_INSNS (3),        /* addsub.  */
1117       COSTS_N_INSNS (1),        /* fpconst.  */
1118       COSTS_N_INSNS (1),        /* neg.  */
1119       COSTS_N_INSNS (3),        /* compare.  */
1120       COSTS_N_INSNS (3),        /* widen.  */
1121       COSTS_N_INSNS (3),        /* narrow.  */
1122       COSTS_N_INSNS (3),        /* toint.  */
1123       COSTS_N_INSNS (3),        /* fromint.  */
1124       COSTS_N_INSNS (3)         /* roundint.  */
1125     }
1126   },
1127   /* Vector */
1128   {
1129     COSTS_N_INSNS (1)   /* alu.  */
1130   }
1131 };
1132
1133 const struct cpu_cost_table cortexa8_extra_costs =
1134 {
1135   /* ALU */
1136   {
1137     0,                  /* arith.  */
1138     0,                  /* logical.  */
1139     COSTS_N_INSNS (1),  /* shift.  */
1140     0,                  /* shift_reg.  */
1141     COSTS_N_INSNS (1),  /* arith_shift.  */
1142     0,                  /* arith_shift_reg.  */
1143     COSTS_N_INSNS (1),  /* log_shift.  */
1144     0,                  /* log_shift_reg.  */
1145     0,                  /* extend.  */
1146     0,                  /* extend_arith.  */
1147     0,                  /* bfi.  */
1148     0,                  /* bfx.  */
1149     0,                  /* clz.  */
1150     0,                  /* rev.  */
1151     0,                  /* non_exec.  */
1152     true                /* non_exec_costs_exec.  */
1153   },
1154   {
1155     /* MULT SImode */
1156     {
1157       COSTS_N_INSNS (1),        /* simple.  */
1158       COSTS_N_INSNS (1),        /* flag_setting.  */
1159       COSTS_N_INSNS (1),        /* extend.  */
1160       COSTS_N_INSNS (1),        /* add.  */
1161       COSTS_N_INSNS (1),        /* extend_add.  */
1162       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1163     },
1164     /* MULT DImode */
1165     {
1166       0,                        /* simple (N/A).  */
1167       0,                        /* flag_setting (N/A).  */
1168       COSTS_N_INSNS (2),        /* extend.  */
1169       0,                        /* add (N/A).  */
1170       COSTS_N_INSNS (2),        /* extend_add.  */
1171       0                         /* idiv (N/A).  */
1172     }
1173   },
1174   /* LD/ST */
1175   {
1176     COSTS_N_INSNS (1),  /* load.  */
1177     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1178     COSTS_N_INSNS (1),  /* ldrd.  */
1179     COSTS_N_INSNS (1),  /* ldm_1st.  */
1180     1,                  /* ldm_regs_per_insn_1st.  */
1181     2,                  /* ldm_regs_per_insn_subsequent.  */
1182     COSTS_N_INSNS (1),  /* loadf.  */
1183     COSTS_N_INSNS (1),  /* loadd.  */
1184     COSTS_N_INSNS (1),  /* load_unaligned.  */
1185     COSTS_N_INSNS (1),  /* store.  */
1186     COSTS_N_INSNS (1),  /* strd.  */
1187     COSTS_N_INSNS (1),  /* stm_1st.  */
1188     1,                  /* stm_regs_per_insn_1st.  */
1189     2,                  /* stm_regs_per_insn_subsequent.  */
1190     COSTS_N_INSNS (1),  /* storef.  */
1191     COSTS_N_INSNS (1),  /* stored.  */
1192     COSTS_N_INSNS (1),  /* store_unaligned.  */
1193     COSTS_N_INSNS (1),  /* loadv.  */
1194     COSTS_N_INSNS (1)   /* storev.  */
1195   },
1196   {
1197     /* FP SFmode */
1198     {
1199       COSTS_N_INSNS (36),       /* div.  */
1200       COSTS_N_INSNS (11),       /* mult.  */
1201       COSTS_N_INSNS (20),       /* mult_addsub. */
1202       COSTS_N_INSNS (30),       /* fma.  */
1203       COSTS_N_INSNS (9),        /* addsub.  */
1204       COSTS_N_INSNS (3),        /* fpconst.  */
1205       COSTS_N_INSNS (3),        /* neg.  */
1206       COSTS_N_INSNS (6),        /* compare.  */
1207       COSTS_N_INSNS (4),        /* widen.  */
1208       COSTS_N_INSNS (4),        /* narrow.  */
1209       COSTS_N_INSNS (8),        /* toint.  */
1210       COSTS_N_INSNS (8),        /* fromint.  */
1211       COSTS_N_INSNS (8)         /* roundint.  */
1212     },
1213     /* FP DFmode */
1214     {
1215       COSTS_N_INSNS (64),       /* div.  */
1216       COSTS_N_INSNS (16),       /* mult.  */
1217       COSTS_N_INSNS (25),       /* mult_addsub.  */
1218       COSTS_N_INSNS (30),       /* fma.  */
1219       COSTS_N_INSNS (9),        /* addsub.  */
1220       COSTS_N_INSNS (3),        /* fpconst.  */
1221       COSTS_N_INSNS (3),        /* neg.  */
1222       COSTS_N_INSNS (6),        /* compare.  */
1223       COSTS_N_INSNS (6),        /* widen.  */
1224       COSTS_N_INSNS (6),        /* narrow.  */
1225       COSTS_N_INSNS (8),        /* toint.  */
1226       COSTS_N_INSNS (8),        /* fromint.  */
1227       COSTS_N_INSNS (8)         /* roundint.  */
1228     }
1229   },
1230   /* Vector */
1231   {
1232     COSTS_N_INSNS (1)   /* alu.  */
1233   }
1234 };
1235
1236 const struct cpu_cost_table cortexa5_extra_costs =
1237 {
1238   /* ALU */
1239   {
1240     0,                  /* arith.  */
1241     0,                  /* logical.  */
1242     COSTS_N_INSNS (1),  /* shift.  */
1243     COSTS_N_INSNS (1),  /* shift_reg.  */
1244     COSTS_N_INSNS (1),  /* arith_shift.  */
1245     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1246     COSTS_N_INSNS (1),  /* log_shift.  */
1247     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1248     COSTS_N_INSNS (1),  /* extend.  */
1249     COSTS_N_INSNS (1),  /* extend_arith.  */
1250     COSTS_N_INSNS (1),  /* bfi.  */
1251     COSTS_N_INSNS (1),  /* bfx.  */
1252     COSTS_N_INSNS (1),  /* clz.  */
1253     COSTS_N_INSNS (1),  /* rev.  */
1254     0,                  /* non_exec.  */
1255     true                /* non_exec_costs_exec.  */
1256   },
1257
1258   {
1259     /* MULT SImode */
1260     {
1261       0,                        /* simple.  */
1262       COSTS_N_INSNS (1),        /* flag_setting.  */
1263       COSTS_N_INSNS (1),        /* extend.  */
1264       COSTS_N_INSNS (1),        /* add.  */
1265       COSTS_N_INSNS (1),        /* extend_add.  */
1266       COSTS_N_INSNS (7)         /* idiv.  */
1267     },
1268     /* MULT DImode */
1269     {
1270       0,                        /* simple (N/A).  */
1271       0,                        /* flag_setting (N/A).  */
1272       COSTS_N_INSNS (1),        /* extend.  */
1273       0,                        /* add.  */
1274       COSTS_N_INSNS (2),        /* extend_add.  */
1275       0                         /* idiv (N/A).  */
1276     }
1277   },
1278   /* LD/ST */
1279   {
1280     COSTS_N_INSNS (1),  /* load.  */
1281     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1282     COSTS_N_INSNS (6),  /* ldrd.  */
1283     COSTS_N_INSNS (1),  /* ldm_1st.  */
1284     1,                  /* ldm_regs_per_insn_1st.  */
1285     2,                  /* ldm_regs_per_insn_subsequent.  */
1286     COSTS_N_INSNS (2),  /* loadf.  */
1287     COSTS_N_INSNS (4),  /* loadd.  */
1288     COSTS_N_INSNS (1),  /* load_unaligned.  */
1289     COSTS_N_INSNS (1),  /* store.  */
1290     COSTS_N_INSNS (3),  /* strd.  */
1291     COSTS_N_INSNS (1),  /* stm_1st.  */
1292     1,                  /* stm_regs_per_insn_1st.  */
1293     2,                  /* stm_regs_per_insn_subsequent.  */
1294     COSTS_N_INSNS (2),  /* storef.  */
1295     COSTS_N_INSNS (2),  /* stored.  */
1296     COSTS_N_INSNS (1),  /* store_unaligned.  */
1297     COSTS_N_INSNS (1),  /* loadv.  */
1298     COSTS_N_INSNS (1)   /* storev.  */
1299   },
1300   {
1301     /* FP SFmode */
1302     {
1303       COSTS_N_INSNS (15),       /* div.  */
1304       COSTS_N_INSNS (3),        /* mult.  */
1305       COSTS_N_INSNS (7),        /* mult_addsub. */
1306       COSTS_N_INSNS (7),        /* fma.  */
1307       COSTS_N_INSNS (3),        /* addsub.  */
1308       COSTS_N_INSNS (3),        /* fpconst.  */
1309       COSTS_N_INSNS (3),        /* neg.  */
1310       COSTS_N_INSNS (3),        /* compare.  */
1311       COSTS_N_INSNS (3),        /* widen.  */
1312       COSTS_N_INSNS (3),        /* narrow.  */
1313       COSTS_N_INSNS (3),        /* toint.  */
1314       COSTS_N_INSNS (3),        /* fromint.  */
1315       COSTS_N_INSNS (3)         /* roundint.  */
1316     },
1317     /* FP DFmode */
1318     {
1319       COSTS_N_INSNS (30),       /* div.  */
1320       COSTS_N_INSNS (6),        /* mult.  */
1321       COSTS_N_INSNS (10),       /* mult_addsub.  */
1322       COSTS_N_INSNS (7),        /* fma.  */
1323       COSTS_N_INSNS (3),        /* addsub.  */
1324       COSTS_N_INSNS (3),        /* fpconst.  */
1325       COSTS_N_INSNS (3),        /* neg.  */
1326       COSTS_N_INSNS (3),        /* compare.  */
1327       COSTS_N_INSNS (3),        /* widen.  */
1328       COSTS_N_INSNS (3),        /* narrow.  */
1329       COSTS_N_INSNS (3),        /* toint.  */
1330       COSTS_N_INSNS (3),        /* fromint.  */
1331       COSTS_N_INSNS (3)         /* roundint.  */
1332     }
1333   },
1334   /* Vector */
1335   {
1336     COSTS_N_INSNS (1)   /* alu.  */
1337   }
1338 };
1339
1340
1341 const struct cpu_cost_table cortexa7_extra_costs =
1342 {
1343   /* ALU */
1344   {
1345     0,                  /* arith.  */
1346     0,                  /* logical.  */
1347     COSTS_N_INSNS (1),  /* shift.  */
1348     COSTS_N_INSNS (1),  /* shift_reg.  */
1349     COSTS_N_INSNS (1),  /* arith_shift.  */
1350     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1351     COSTS_N_INSNS (1),  /* log_shift.  */
1352     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1353     COSTS_N_INSNS (1),  /* extend.  */
1354     COSTS_N_INSNS (1),  /* extend_arith.  */
1355     COSTS_N_INSNS (1),  /* bfi.  */
1356     COSTS_N_INSNS (1),  /* bfx.  */
1357     COSTS_N_INSNS (1),  /* clz.  */
1358     COSTS_N_INSNS (1),  /* rev.  */
1359     0,                  /* non_exec.  */
1360     true                /* non_exec_costs_exec.  */
1361   },
1362
1363   {
1364     /* MULT SImode */
1365     {
1366       0,                        /* simple.  */
1367       COSTS_N_INSNS (1),        /* flag_setting.  */
1368       COSTS_N_INSNS (1),        /* extend.  */
1369       COSTS_N_INSNS (1),        /* add.  */
1370       COSTS_N_INSNS (1),        /* extend_add.  */
1371       COSTS_N_INSNS (7)         /* idiv.  */
1372     },
1373     /* MULT DImode */
1374     {
1375       0,                        /* simple (N/A).  */
1376       0,                        /* flag_setting (N/A).  */
1377       COSTS_N_INSNS (1),        /* extend.  */
1378       0,                        /* add.  */
1379       COSTS_N_INSNS (2),        /* extend_add.  */
1380       0                         /* idiv (N/A).  */
1381     }
1382   },
1383   /* LD/ST */
1384   {
1385     COSTS_N_INSNS (1),  /* load.  */
1386     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1387     COSTS_N_INSNS (3),  /* ldrd.  */
1388     COSTS_N_INSNS (1),  /* ldm_1st.  */
1389     1,                  /* ldm_regs_per_insn_1st.  */
1390     2,                  /* ldm_regs_per_insn_subsequent.  */
1391     COSTS_N_INSNS (2),  /* loadf.  */
1392     COSTS_N_INSNS (2),  /* loadd.  */
1393     COSTS_N_INSNS (1),  /* load_unaligned.  */
1394     COSTS_N_INSNS (1),  /* store.  */
1395     COSTS_N_INSNS (3),  /* strd.  */
1396     COSTS_N_INSNS (1),  /* stm_1st.  */
1397     1,                  /* stm_regs_per_insn_1st.  */
1398     2,                  /* stm_regs_per_insn_subsequent.  */
1399     COSTS_N_INSNS (2),  /* storef.  */
1400     COSTS_N_INSNS (2),  /* stored.  */
1401     COSTS_N_INSNS (1),  /* store_unaligned.  */
1402     COSTS_N_INSNS (1),  /* loadv.  */
1403     COSTS_N_INSNS (1)   /* storev.  */
1404   },
1405   {
1406     /* FP SFmode */
1407     {
1408       COSTS_N_INSNS (15),       /* div.  */
1409       COSTS_N_INSNS (3),        /* mult.  */
1410       COSTS_N_INSNS (7),        /* mult_addsub. */
1411       COSTS_N_INSNS (7),        /* fma.  */
1412       COSTS_N_INSNS (3),        /* addsub.  */
1413       COSTS_N_INSNS (3),        /* fpconst.  */
1414       COSTS_N_INSNS (3),        /* neg.  */
1415       COSTS_N_INSNS (3),        /* compare.  */
1416       COSTS_N_INSNS (3),        /* widen.  */
1417       COSTS_N_INSNS (3),        /* narrow.  */
1418       COSTS_N_INSNS (3),        /* toint.  */
1419       COSTS_N_INSNS (3),        /* fromint.  */
1420       COSTS_N_INSNS (3)         /* roundint.  */
1421     },
1422     /* FP DFmode */
1423     {
1424       COSTS_N_INSNS (30),       /* div.  */
1425       COSTS_N_INSNS (6),        /* mult.  */
1426       COSTS_N_INSNS (10),       /* mult_addsub.  */
1427       COSTS_N_INSNS (7),        /* fma.  */
1428       COSTS_N_INSNS (3),        /* addsub.  */
1429       COSTS_N_INSNS (3),        /* fpconst.  */
1430       COSTS_N_INSNS (3),        /* neg.  */
1431       COSTS_N_INSNS (3),        /* compare.  */
1432       COSTS_N_INSNS (3),        /* widen.  */
1433       COSTS_N_INSNS (3),        /* narrow.  */
1434       COSTS_N_INSNS (3),        /* toint.  */
1435       COSTS_N_INSNS (3),        /* fromint.  */
1436       COSTS_N_INSNS (3)         /* roundint.  */
1437     }
1438   },
1439   /* Vector */
1440   {
1441     COSTS_N_INSNS (1)   /* alu.  */
1442   }
1443 };
1444
1445 const struct cpu_cost_table cortexa12_extra_costs =
1446 {
1447   /* ALU */
1448   {
1449     0,                  /* arith.  */
1450     0,                  /* logical.  */
1451     0,                  /* shift.  */
1452     COSTS_N_INSNS (1),  /* shift_reg.  */
1453     COSTS_N_INSNS (1),  /* arith_shift.  */
1454     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1455     COSTS_N_INSNS (1),  /* log_shift.  */
1456     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1457     0,                  /* extend.  */
1458     COSTS_N_INSNS (1),  /* extend_arith.  */
1459     0,                  /* bfi.  */
1460     COSTS_N_INSNS (1),  /* bfx.  */
1461     COSTS_N_INSNS (1),  /* clz.  */
1462     COSTS_N_INSNS (1),  /* rev.  */
1463     0,                  /* non_exec.  */
1464     true                /* non_exec_costs_exec.  */
1465   },
1466   /* MULT SImode */
1467   {
1468     {
1469       COSTS_N_INSNS (2),        /* simple.  */
1470       COSTS_N_INSNS (3),        /* flag_setting.  */
1471       COSTS_N_INSNS (2),        /* extend.  */
1472       COSTS_N_INSNS (3),        /* add.  */
1473       COSTS_N_INSNS (2),        /* extend_add.  */
1474       COSTS_N_INSNS (18)        /* idiv.  */
1475     },
1476     /* MULT DImode */
1477     {
1478       0,                        /* simple (N/A).  */
1479       0,                        /* flag_setting (N/A).  */
1480       COSTS_N_INSNS (3),        /* extend.  */
1481       0,                        /* add (N/A).  */
1482       COSTS_N_INSNS (3),        /* extend_add.  */
1483       0                         /* idiv (N/A).  */
1484     }
1485   },
1486   /* LD/ST */
1487   {
1488     COSTS_N_INSNS (3),  /* load.  */
1489     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1490     COSTS_N_INSNS (3),  /* ldrd.  */
1491     COSTS_N_INSNS (3),  /* ldm_1st.  */
1492     1,                  /* ldm_regs_per_insn_1st.  */
1493     2,                  /* ldm_regs_per_insn_subsequent.  */
1494     COSTS_N_INSNS (3),  /* loadf.  */
1495     COSTS_N_INSNS (3),  /* loadd.  */
1496     0,                  /* load_unaligned.  */
1497     0,                  /* store.  */
1498     0,                  /* strd.  */
1499     0,                  /* stm_1st.  */
1500     1,                  /* stm_regs_per_insn_1st.  */
1501     2,                  /* stm_regs_per_insn_subsequent.  */
1502     COSTS_N_INSNS (2),  /* storef.  */
1503     COSTS_N_INSNS (2),  /* stored.  */
1504     0,                  /* store_unaligned.  */
1505     COSTS_N_INSNS (1),  /* loadv.  */
1506     COSTS_N_INSNS (1)   /* storev.  */
1507   },
1508   {
1509     /* FP SFmode */
1510     {
1511       COSTS_N_INSNS (17),       /* div.  */
1512       COSTS_N_INSNS (4),        /* mult.  */
1513       COSTS_N_INSNS (8),        /* mult_addsub. */
1514       COSTS_N_INSNS (8),        /* fma.  */
1515       COSTS_N_INSNS (4),        /* addsub.  */
1516       COSTS_N_INSNS (2),        /* fpconst. */
1517       COSTS_N_INSNS (2),        /* neg.  */
1518       COSTS_N_INSNS (2),        /* compare.  */
1519       COSTS_N_INSNS (4),        /* widen.  */
1520       COSTS_N_INSNS (4),        /* narrow.  */
1521       COSTS_N_INSNS (4),        /* toint.  */
1522       COSTS_N_INSNS (4),        /* fromint.  */
1523       COSTS_N_INSNS (4)         /* roundint.  */
1524     },
1525     /* FP DFmode */
1526     {
1527       COSTS_N_INSNS (31),       /* div.  */
1528       COSTS_N_INSNS (4),        /* mult.  */
1529       COSTS_N_INSNS (8),        /* mult_addsub.  */
1530       COSTS_N_INSNS (8),        /* fma.  */
1531       COSTS_N_INSNS (4),        /* addsub.  */
1532       COSTS_N_INSNS (2),        /* fpconst.  */
1533       COSTS_N_INSNS (2),        /* neg.  */
1534       COSTS_N_INSNS (2),        /* compare.  */
1535       COSTS_N_INSNS (4),        /* widen.  */
1536       COSTS_N_INSNS (4),        /* narrow.  */
1537       COSTS_N_INSNS (4),        /* toint.  */
1538       COSTS_N_INSNS (4),        /* fromint.  */
1539       COSTS_N_INSNS (4)         /* roundint.  */
1540     }
1541   },
1542   /* Vector */
1543   {
1544     COSTS_N_INSNS (1)   /* alu.  */
1545   }
1546 };
1547
1548 const struct cpu_cost_table cortexa15_extra_costs =
1549 {
1550   /* ALU */
1551   {
1552     0,                  /* arith.  */
1553     0,                  /* logical.  */
1554     0,                  /* shift.  */
1555     0,                  /* shift_reg.  */
1556     COSTS_N_INSNS (1),  /* arith_shift.  */
1557     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1558     COSTS_N_INSNS (1),  /* log_shift.  */
1559     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1560     0,                  /* extend.  */
1561     COSTS_N_INSNS (1),  /* extend_arith.  */
1562     COSTS_N_INSNS (1),  /* bfi.  */
1563     0,                  /* bfx.  */
1564     0,                  /* clz.  */
1565     0,                  /* rev.  */
1566     0,                  /* non_exec.  */
1567     true                /* non_exec_costs_exec.  */
1568   },
1569   /* MULT SImode */
1570   {
1571     {
1572       COSTS_N_INSNS (2),        /* simple.  */
1573       COSTS_N_INSNS (3),        /* flag_setting.  */
1574       COSTS_N_INSNS (2),        /* extend.  */
1575       COSTS_N_INSNS (2),        /* add.  */
1576       COSTS_N_INSNS (2),        /* extend_add.  */
1577       COSTS_N_INSNS (18)        /* idiv.  */
1578     },
1579     /* MULT DImode */
1580     {
1581       0,                        /* simple (N/A).  */
1582       0,                        /* flag_setting (N/A).  */
1583       COSTS_N_INSNS (3),        /* extend.  */
1584       0,                        /* add (N/A).  */
1585       COSTS_N_INSNS (3),        /* extend_add.  */
1586       0                         /* idiv (N/A).  */
1587     }
1588   },
1589   /* LD/ST */
1590   {
1591     COSTS_N_INSNS (3),  /* load.  */
1592     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1593     COSTS_N_INSNS (3),  /* ldrd.  */
1594     COSTS_N_INSNS (4),  /* ldm_1st.  */
1595     1,                  /* ldm_regs_per_insn_1st.  */
1596     2,                  /* ldm_regs_per_insn_subsequent.  */
1597     COSTS_N_INSNS (4),  /* loadf.  */
1598     COSTS_N_INSNS (4),  /* loadd.  */
1599     0,                  /* load_unaligned.  */
1600     0,                  /* store.  */
1601     0,                  /* strd.  */
1602     COSTS_N_INSNS (1),  /* stm_1st.  */
1603     1,                  /* stm_regs_per_insn_1st.  */
1604     2,                  /* stm_regs_per_insn_subsequent.  */
1605     0,                  /* storef.  */
1606     0,                  /* stored.  */
1607     0,                  /* store_unaligned.  */
1608     COSTS_N_INSNS (1),  /* loadv.  */
1609     COSTS_N_INSNS (1)   /* storev.  */
1610   },
1611   {
1612     /* FP SFmode */
1613     {
1614       COSTS_N_INSNS (17),       /* div.  */
1615       COSTS_N_INSNS (4),        /* mult.  */
1616       COSTS_N_INSNS (8),        /* mult_addsub. */
1617       COSTS_N_INSNS (8),        /* fma.  */
1618       COSTS_N_INSNS (4),        /* addsub.  */
1619       COSTS_N_INSNS (2),        /* fpconst. */
1620       COSTS_N_INSNS (2),        /* neg.  */
1621       COSTS_N_INSNS (5),        /* compare.  */
1622       COSTS_N_INSNS (4),        /* widen.  */
1623       COSTS_N_INSNS (4),        /* narrow.  */
1624       COSTS_N_INSNS (4),        /* toint.  */
1625       COSTS_N_INSNS (4),        /* fromint.  */
1626       COSTS_N_INSNS (4)         /* roundint.  */
1627     },
1628     /* FP DFmode */
1629     {
1630       COSTS_N_INSNS (31),       /* div.  */
1631       COSTS_N_INSNS (4),        /* mult.  */
1632       COSTS_N_INSNS (8),        /* mult_addsub.  */
1633       COSTS_N_INSNS (8),        /* fma.  */
1634       COSTS_N_INSNS (4),        /* addsub.  */
1635       COSTS_N_INSNS (2),        /* fpconst.  */
1636       COSTS_N_INSNS (2),        /* neg.  */
1637       COSTS_N_INSNS (2),        /* compare.  */
1638       COSTS_N_INSNS (4),        /* widen.  */
1639       COSTS_N_INSNS (4),        /* narrow.  */
1640       COSTS_N_INSNS (4),        /* toint.  */
1641       COSTS_N_INSNS (4),        /* fromint.  */
1642       COSTS_N_INSNS (4)         /* roundint.  */
1643     }
1644   },
1645   /* Vector */
1646   {
1647     COSTS_N_INSNS (1)   /* alu.  */
1648   }
1649 };
1650
1651 const struct cpu_cost_table v7m_extra_costs =
1652 {
1653   /* ALU */
1654   {
1655     0,                  /* arith.  */
1656     0,                  /* logical.  */
1657     0,                  /* shift.  */
1658     0,                  /* shift_reg.  */
1659     0,                  /* arith_shift.  */
1660     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1661     0,                  /* log_shift.  */
1662     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1663     0,                  /* extend.  */
1664     COSTS_N_INSNS (1),  /* extend_arith.  */
1665     0,                  /* bfi.  */
1666     0,                  /* bfx.  */
1667     0,                  /* clz.  */
1668     0,                  /* rev.  */
1669     COSTS_N_INSNS (1),  /* non_exec.  */
1670     false               /* non_exec_costs_exec.  */
1671   },
1672   {
1673     /* MULT SImode */
1674     {
1675       COSTS_N_INSNS (1),        /* simple.  */
1676       COSTS_N_INSNS (1),        /* flag_setting.  */
1677       COSTS_N_INSNS (2),        /* extend.  */
1678       COSTS_N_INSNS (1),        /* add.  */
1679       COSTS_N_INSNS (3),        /* extend_add.  */
1680       COSTS_N_INSNS (8)         /* idiv.  */
1681     },
1682     /* MULT DImode */
1683     {
1684       0,                        /* simple (N/A).  */
1685       0,                        /* flag_setting (N/A).  */
1686       COSTS_N_INSNS (2),        /* extend.  */
1687       0,                        /* add (N/A).  */
1688       COSTS_N_INSNS (3),        /* extend_add.  */
1689       0                         /* idiv (N/A).  */
1690     }
1691   },
1692   /* LD/ST */
1693   {
1694     COSTS_N_INSNS (2),  /* load.  */
1695     0,                  /* load_sign_extend.  */
1696     COSTS_N_INSNS (3),  /* ldrd.  */
1697     COSTS_N_INSNS (2),  /* ldm_1st.  */
1698     1,                  /* ldm_regs_per_insn_1st.  */
1699     1,                  /* ldm_regs_per_insn_subsequent.  */
1700     COSTS_N_INSNS (2),  /* loadf.  */
1701     COSTS_N_INSNS (3),  /* loadd.  */
1702     COSTS_N_INSNS (1),  /* load_unaligned.  */
1703     COSTS_N_INSNS (2),  /* store.  */
1704     COSTS_N_INSNS (3),  /* strd.  */
1705     COSTS_N_INSNS (2),  /* stm_1st.  */
1706     1,                  /* stm_regs_per_insn_1st.  */
1707     1,                  /* stm_regs_per_insn_subsequent.  */
1708     COSTS_N_INSNS (2),  /* storef.  */
1709     COSTS_N_INSNS (3),  /* stored.  */
1710     COSTS_N_INSNS (1),  /* store_unaligned.  */
1711     COSTS_N_INSNS (1),  /* loadv.  */
1712     COSTS_N_INSNS (1)   /* storev.  */
1713   },
1714   {
1715     /* FP SFmode */
1716     {
1717       COSTS_N_INSNS (7),        /* div.  */
1718       COSTS_N_INSNS (2),        /* mult.  */
1719       COSTS_N_INSNS (5),        /* mult_addsub.  */
1720       COSTS_N_INSNS (3),        /* fma.  */
1721       COSTS_N_INSNS (1),        /* addsub.  */
1722       0,                        /* fpconst.  */
1723       0,                        /* neg.  */
1724       0,                        /* compare.  */
1725       0,                        /* widen.  */
1726       0,                        /* narrow.  */
1727       0,                        /* toint.  */
1728       0,                        /* fromint.  */
1729       0                         /* roundint.  */
1730     },
1731     /* FP DFmode */
1732     {
1733       COSTS_N_INSNS (15),       /* div.  */
1734       COSTS_N_INSNS (5),        /* mult.  */
1735       COSTS_N_INSNS (7),        /* mult_addsub.  */
1736       COSTS_N_INSNS (7),        /* fma.  */
1737       COSTS_N_INSNS (3),        /* addsub.  */
1738       0,                        /* fpconst.  */
1739       0,                        /* neg.  */
1740       0,                        /* compare.  */
1741       0,                        /* widen.  */
1742       0,                        /* narrow.  */
1743       0,                        /* toint.  */
1744       0,                        /* fromint.  */
1745       0                         /* roundint.  */
1746     }
1747   },
1748   /* Vector */
1749   {
1750     COSTS_N_INSNS (1)   /* alu.  */
1751   }
1752 };
1753
1754 const struct tune_params arm_slowmul_tune =
1755 {
1756   &generic_extra_costs,                 /* Insn extra costs.  */
1757   NULL,                                 /* Sched adj cost.  */
1758   arm_default_branch_cost,
1759   &arm_default_vec_cost,
1760   3,                                            /* Constant limit.  */
1761   5,                                            /* Max cond insns.  */
1762   8,                                            /* Memset max inline.  */
1763   1,                                            /* Issue rate.  */
1764   ARM_PREFETCH_NOT_BENEFICIAL,
1765   tune_params::PREF_CONST_POOL_TRUE,
1766   tune_params::PREF_LDRD_FALSE,
1767   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1768   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1769   tune_params::DISPARAGE_FLAGS_NEITHER,
1770   tune_params::PREF_NEON_64_FALSE,
1771   tune_params::PREF_NEON_STRINGOPS_FALSE,
1772   tune_params::FUSE_NOTHING,
1773   tune_params::SCHED_AUTOPREF_OFF
1774 };
1775
1776 const struct tune_params arm_fastmul_tune =
1777 {
1778   &generic_extra_costs,                 /* Insn extra costs.  */
1779   NULL,                                 /* Sched adj cost.  */
1780   arm_default_branch_cost,
1781   &arm_default_vec_cost,
1782   1,                                            /* Constant limit.  */
1783   5,                                            /* Max cond insns.  */
1784   8,                                            /* Memset max inline.  */
1785   1,                                            /* Issue rate.  */
1786   ARM_PREFETCH_NOT_BENEFICIAL,
1787   tune_params::PREF_CONST_POOL_TRUE,
1788   tune_params::PREF_LDRD_FALSE,
1789   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1790   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1791   tune_params::DISPARAGE_FLAGS_NEITHER,
1792   tune_params::PREF_NEON_64_FALSE,
1793   tune_params::PREF_NEON_STRINGOPS_FALSE,
1794   tune_params::FUSE_NOTHING,
1795   tune_params::SCHED_AUTOPREF_OFF
1796 };
1797
1798 /* StrongARM has early execution of branches, so a sequence that is worth
1799    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1800
1801 const struct tune_params arm_strongarm_tune =
1802 {
1803   &generic_extra_costs,                 /* Insn extra costs.  */
1804   NULL,                                 /* Sched adj cost.  */
1805   arm_default_branch_cost,
1806   &arm_default_vec_cost,
1807   1,                                            /* Constant limit.  */
1808   3,                                            /* Max cond insns.  */
1809   8,                                            /* Memset max inline.  */
1810   1,                                            /* Issue rate.  */
1811   ARM_PREFETCH_NOT_BENEFICIAL,
1812   tune_params::PREF_CONST_POOL_TRUE,
1813   tune_params::PREF_LDRD_FALSE,
1814   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1815   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1816   tune_params::DISPARAGE_FLAGS_NEITHER,
1817   tune_params::PREF_NEON_64_FALSE,
1818   tune_params::PREF_NEON_STRINGOPS_FALSE,
1819   tune_params::FUSE_NOTHING,
1820   tune_params::SCHED_AUTOPREF_OFF
1821 };
1822
1823 const struct tune_params arm_xscale_tune =
1824 {
1825   &generic_extra_costs,                 /* Insn extra costs.  */
1826   xscale_sched_adjust_cost,
1827   arm_default_branch_cost,
1828   &arm_default_vec_cost,
1829   2,                                            /* Constant limit.  */
1830   3,                                            /* Max cond insns.  */
1831   8,                                            /* Memset max inline.  */
1832   1,                                            /* Issue rate.  */
1833   ARM_PREFETCH_NOT_BENEFICIAL,
1834   tune_params::PREF_CONST_POOL_TRUE,
1835   tune_params::PREF_LDRD_FALSE,
1836   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1837   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1838   tune_params::DISPARAGE_FLAGS_NEITHER,
1839   tune_params::PREF_NEON_64_FALSE,
1840   tune_params::PREF_NEON_STRINGOPS_FALSE,
1841   tune_params::FUSE_NOTHING,
1842   tune_params::SCHED_AUTOPREF_OFF
1843 };
1844
1845 const struct tune_params arm_9e_tune =
1846 {
1847   &generic_extra_costs,                 /* Insn extra costs.  */
1848   NULL,                                 /* Sched adj cost.  */
1849   arm_default_branch_cost,
1850   &arm_default_vec_cost,
1851   1,                                            /* Constant limit.  */
1852   5,                                            /* Max cond insns.  */
1853   8,                                            /* Memset max inline.  */
1854   1,                                            /* Issue rate.  */
1855   ARM_PREFETCH_NOT_BENEFICIAL,
1856   tune_params::PREF_CONST_POOL_TRUE,
1857   tune_params::PREF_LDRD_FALSE,
1858   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1859   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1860   tune_params::DISPARAGE_FLAGS_NEITHER,
1861   tune_params::PREF_NEON_64_FALSE,
1862   tune_params::PREF_NEON_STRINGOPS_FALSE,
1863   tune_params::FUSE_NOTHING,
1864   tune_params::SCHED_AUTOPREF_OFF
1865 };
1866
1867 const struct tune_params arm_marvell_pj4_tune =
1868 {
1869   &generic_extra_costs,                 /* Insn extra costs.  */
1870   NULL,                                 /* Sched adj cost.  */
1871   arm_default_branch_cost,
1872   &arm_default_vec_cost,
1873   1,                                            /* Constant limit.  */
1874   5,                                            /* Max cond insns.  */
1875   8,                                            /* Memset max inline.  */
1876   2,                                            /* Issue rate.  */
1877   ARM_PREFETCH_NOT_BENEFICIAL,
1878   tune_params::PREF_CONST_POOL_TRUE,
1879   tune_params::PREF_LDRD_FALSE,
1880   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1881   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1882   tune_params::DISPARAGE_FLAGS_NEITHER,
1883   tune_params::PREF_NEON_64_FALSE,
1884   tune_params::PREF_NEON_STRINGOPS_FALSE,
1885   tune_params::FUSE_NOTHING,
1886   tune_params::SCHED_AUTOPREF_OFF
1887 };
1888
1889 const struct tune_params arm_v6t2_tune =
1890 {
1891   &generic_extra_costs,                 /* Insn extra costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   1,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_FALSE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_64_FALSE,
1906   tune_params::PREF_NEON_STRINGOPS_FALSE,
1907   tune_params::FUSE_NOTHING,
1908   tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911
1912 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1913 const struct tune_params arm_cortex_tune =
1914 {
1915   &generic_extra_costs,
1916   NULL,                                 /* Sched adj cost.  */
1917   arm_default_branch_cost,
1918   &arm_default_vec_cost,
1919   1,                                            /* Constant limit.  */
1920   5,                                            /* Max cond insns.  */
1921   8,                                            /* Memset max inline.  */
1922   2,                                            /* Issue rate.  */
1923   ARM_PREFETCH_NOT_BENEFICIAL,
1924   tune_params::PREF_CONST_POOL_FALSE,
1925   tune_params::PREF_LDRD_FALSE,
1926   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1927   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1928   tune_params::DISPARAGE_FLAGS_NEITHER,
1929   tune_params::PREF_NEON_64_FALSE,
1930   tune_params::PREF_NEON_STRINGOPS_FALSE,
1931   tune_params::FUSE_NOTHING,
1932   tune_params::SCHED_AUTOPREF_OFF
1933 };
1934
1935 const struct tune_params arm_cortex_a8_tune =
1936 {
1937   &cortexa8_extra_costs,
1938   NULL,                                 /* Sched adj cost.  */
1939   arm_default_branch_cost,
1940   &arm_default_vec_cost,
1941   1,                                            /* Constant limit.  */
1942   5,                                            /* Max cond insns.  */
1943   8,                                            /* Memset max inline.  */
1944   2,                                            /* Issue rate.  */
1945   ARM_PREFETCH_NOT_BENEFICIAL,
1946   tune_params::PREF_CONST_POOL_FALSE,
1947   tune_params::PREF_LDRD_FALSE,
1948   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1950   tune_params::DISPARAGE_FLAGS_NEITHER,
1951   tune_params::PREF_NEON_64_FALSE,
1952   tune_params::PREF_NEON_STRINGOPS_TRUE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_cortex_a7_tune =
1958 {
1959   &cortexa7_extra_costs,
1960   NULL,                                 /* Sched adj cost.  */
1961   arm_default_branch_cost,
1962   &arm_default_vec_cost,
1963   1,                                            /* Constant limit.  */
1964   5,                                            /* Max cond insns.  */
1965   8,                                            /* Memset max inline.  */
1966   2,                                            /* Issue rate.  */
1967   ARM_PREFETCH_NOT_BENEFICIAL,
1968   tune_params::PREF_CONST_POOL_FALSE,
1969   tune_params::PREF_LDRD_FALSE,
1970   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1972   tune_params::DISPARAGE_FLAGS_NEITHER,
1973   tune_params::PREF_NEON_64_FALSE,
1974   tune_params::PREF_NEON_STRINGOPS_TRUE,
1975   tune_params::FUSE_NOTHING,
1976   tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_cortex_a15_tune =
1980 {
1981   &cortexa15_extra_costs,
1982   NULL,                                 /* Sched adj cost.  */
1983   arm_default_branch_cost,
1984   &arm_default_vec_cost,
1985   1,                                            /* Constant limit.  */
1986   2,                                            /* Max cond insns.  */
1987   8,                                            /* Memset max inline.  */
1988   3,                                            /* Issue rate.  */
1989   ARM_PREFETCH_NOT_BENEFICIAL,
1990   tune_params::PREF_CONST_POOL_FALSE,
1991   tune_params::PREF_LDRD_TRUE,
1992   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1993   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1994   tune_params::DISPARAGE_FLAGS_ALL,
1995   tune_params::PREF_NEON_64_FALSE,
1996   tune_params::PREF_NEON_STRINGOPS_TRUE,
1997   tune_params::FUSE_NOTHING,
1998   tune_params::SCHED_AUTOPREF_FULL
1999 };
2000
2001 const struct tune_params arm_cortex_a35_tune =
2002 {
2003   &cortexa53_extra_costs,
2004   NULL,                                 /* Sched adj cost.  */
2005   arm_default_branch_cost,
2006   &arm_default_vec_cost,
2007   1,                                            /* Constant limit.  */
2008   5,                                            /* Max cond insns.  */
2009   8,                                            /* Memset max inline.  */
2010   1,                                            /* Issue rate.  */
2011   ARM_PREFETCH_NOT_BENEFICIAL,
2012   tune_params::PREF_CONST_POOL_FALSE,
2013   tune_params::PREF_LDRD_FALSE,
2014   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2016   tune_params::DISPARAGE_FLAGS_NEITHER,
2017   tune_params::PREF_NEON_64_FALSE,
2018   tune_params::PREF_NEON_STRINGOPS_TRUE,
2019   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_cortex_a53_tune =
2024 {
2025   &cortexa53_extra_costs,
2026   NULL,                                 /* Sched adj cost.  */
2027   arm_default_branch_cost,
2028   &arm_default_vec_cost,
2029   1,                                            /* Constant limit.  */
2030   5,                                            /* Max cond insns.  */
2031   8,                                            /* Memset max inline.  */
2032   2,                                            /* Issue rate.  */
2033   ARM_PREFETCH_NOT_BENEFICIAL,
2034   tune_params::PREF_CONST_POOL_FALSE,
2035   tune_params::PREF_LDRD_FALSE,
2036   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2038   tune_params::DISPARAGE_FLAGS_NEITHER,
2039   tune_params::PREF_NEON_64_FALSE,
2040   tune_params::PREF_NEON_STRINGOPS_TRUE,
2041   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045 const struct tune_params arm_cortex_a57_tune =
2046 {
2047   &cortexa57_extra_costs,
2048   NULL,                                 /* Sched adj cost.  */
2049   arm_default_branch_cost,
2050   &arm_default_vec_cost,
2051   1,                                            /* Constant limit.  */
2052   2,                                            /* Max cond insns.  */
2053   8,                                            /* Memset max inline.  */
2054   3,                                            /* Issue rate.  */
2055   ARM_PREFETCH_NOT_BENEFICIAL,
2056   tune_params::PREF_CONST_POOL_FALSE,
2057   tune_params::PREF_LDRD_TRUE,
2058   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2059   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2060   tune_params::DISPARAGE_FLAGS_ALL,
2061   tune_params::PREF_NEON_64_FALSE,
2062   tune_params::PREF_NEON_STRINGOPS_TRUE,
2063   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2064   tune_params::SCHED_AUTOPREF_FULL
2065 };
2066
2067 const struct tune_params arm_exynosm1_tune =
2068 {
2069   &exynosm1_extra_costs,
2070   NULL,                                         /* Sched adj cost.  */
2071   arm_default_branch_cost,
2072   &arm_default_vec_cost,
2073   1,                                            /* Constant limit.  */
2074   2,                                            /* Max cond insns.  */
2075   8,                                            /* Memset max inline.  */
2076   3,                                            /* Issue rate.  */
2077   ARM_PREFETCH_NOT_BENEFICIAL,
2078   tune_params::PREF_CONST_POOL_FALSE,
2079   tune_params::PREF_LDRD_TRUE,
2080   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2081   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2082   tune_params::DISPARAGE_FLAGS_ALL,
2083   tune_params::PREF_NEON_64_FALSE,
2084   tune_params::PREF_NEON_STRINGOPS_TRUE,
2085   tune_params::FUSE_NOTHING,
2086   tune_params::SCHED_AUTOPREF_OFF
2087 };
2088
2089 const struct tune_params arm_xgene1_tune =
2090 {
2091   &xgene1_extra_costs,
2092   NULL,                                 /* Sched adj cost.  */
2093   arm_default_branch_cost,
2094   &arm_default_vec_cost,
2095   1,                                            /* Constant limit.  */
2096   2,                                            /* Max cond insns.  */
2097   32,                                           /* Memset max inline.  */
2098   4,                                            /* Issue rate.  */
2099   ARM_PREFETCH_NOT_BENEFICIAL,
2100   tune_params::PREF_CONST_POOL_FALSE,
2101   tune_params::PREF_LDRD_TRUE,
2102   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2103   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2104   tune_params::DISPARAGE_FLAGS_ALL,
2105   tune_params::PREF_NEON_64_FALSE,
2106   tune_params::PREF_NEON_STRINGOPS_FALSE,
2107   tune_params::FUSE_NOTHING,
2108   tune_params::SCHED_AUTOPREF_OFF
2109 };
2110
2111 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2112    less appealing.  Set max_insns_skipped to a low value.  */
2113
2114 const struct tune_params arm_cortex_a5_tune =
2115 {
2116   &cortexa5_extra_costs,
2117   NULL,                                 /* Sched adj cost.  */
2118   arm_cortex_a5_branch_cost,
2119   &arm_default_vec_cost,
2120   1,                                            /* Constant limit.  */
2121   1,                                            /* Max cond insns.  */
2122   8,                                            /* Memset max inline.  */
2123   2,                                            /* Issue rate.  */
2124   ARM_PREFETCH_NOT_BENEFICIAL,
2125   tune_params::PREF_CONST_POOL_FALSE,
2126   tune_params::PREF_LDRD_FALSE,
2127   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2128   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2129   tune_params::DISPARAGE_FLAGS_NEITHER,
2130   tune_params::PREF_NEON_64_FALSE,
2131   tune_params::PREF_NEON_STRINGOPS_TRUE,
2132   tune_params::FUSE_NOTHING,
2133   tune_params::SCHED_AUTOPREF_OFF
2134 };
2135
2136 const struct tune_params arm_cortex_a9_tune =
2137 {
2138   &cortexa9_extra_costs,
2139   cortex_a9_sched_adjust_cost,
2140   arm_default_branch_cost,
2141   &arm_default_vec_cost,
2142   1,                                            /* Constant limit.  */
2143   5,                                            /* Max cond insns.  */
2144   8,                                            /* Memset max inline.  */
2145   2,                                            /* Issue rate.  */
2146   ARM_PREFETCH_BENEFICIAL(4,32,32),
2147   tune_params::PREF_CONST_POOL_FALSE,
2148   tune_params::PREF_LDRD_FALSE,
2149   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2150   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2151   tune_params::DISPARAGE_FLAGS_NEITHER,
2152   tune_params::PREF_NEON_64_FALSE,
2153   tune_params::PREF_NEON_STRINGOPS_FALSE,
2154   tune_params::FUSE_NOTHING,
2155   tune_params::SCHED_AUTOPREF_OFF
2156 };
2157
2158 const struct tune_params arm_cortex_a12_tune =
2159 {
2160   &cortexa12_extra_costs,
2161   NULL,                                 /* Sched adj cost.  */
2162   arm_default_branch_cost,
2163   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2164   1,                                            /* Constant limit.  */
2165   2,                                            /* Max cond insns.  */
2166   8,                                            /* Memset max inline.  */
2167   2,                                            /* Issue rate.  */
2168   ARM_PREFETCH_NOT_BENEFICIAL,
2169   tune_params::PREF_CONST_POOL_FALSE,
2170   tune_params::PREF_LDRD_TRUE,
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2172   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2173   tune_params::DISPARAGE_FLAGS_ALL,
2174   tune_params::PREF_NEON_64_FALSE,
2175   tune_params::PREF_NEON_STRINGOPS_TRUE,
2176   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2177   tune_params::SCHED_AUTOPREF_OFF
2178 };
2179
2180 const struct tune_params arm_cortex_a73_tune =
2181 {
2182   &cortexa57_extra_costs,
2183   NULL,                                         /* Sched adj cost.  */
2184   arm_default_branch_cost,
2185   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2186   1,                                            /* Constant limit.  */
2187   2,                                            /* Max cond insns.  */
2188   8,                                            /* Memset max inline.  */
2189   2,                                            /* Issue rate.  */
2190   ARM_PREFETCH_NOT_BENEFICIAL,
2191   tune_params::PREF_CONST_POOL_FALSE,
2192   tune_params::PREF_LDRD_TRUE,
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2194   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2195   tune_params::DISPARAGE_FLAGS_ALL,
2196   tune_params::PREF_NEON_64_FALSE,
2197   tune_params::PREF_NEON_STRINGOPS_TRUE,
2198   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2199   tune_params::SCHED_AUTOPREF_FULL
2200 };
2201
2202 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2203    cycle to execute each.  An LDR from the constant pool also takes two cycles
2204    to execute, but mildly increases pipelining opportunity (consecutive
2205    loads/stores can be pipelined together, saving one cycle), and may also
2206    improve icache utilisation.  Hence we prefer the constant pool for such
2207    processors.  */
2208
2209 const struct tune_params arm_v7m_tune =
2210 {
2211   &v7m_extra_costs,
2212   NULL,                                 /* Sched adj cost.  */
2213   arm_cortex_m_branch_cost,
2214   &arm_default_vec_cost,
2215   1,                                            /* Constant limit.  */
2216   2,                                            /* Max cond insns.  */
2217   8,                                            /* Memset max inline.  */
2218   1,                                            /* Issue rate.  */
2219   ARM_PREFETCH_NOT_BENEFICIAL,
2220   tune_params::PREF_CONST_POOL_TRUE,
2221   tune_params::PREF_LDRD_FALSE,
2222   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2223   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2224   tune_params::DISPARAGE_FLAGS_NEITHER,
2225   tune_params::PREF_NEON_64_FALSE,
2226   tune_params::PREF_NEON_STRINGOPS_FALSE,
2227   tune_params::FUSE_NOTHING,
2228   tune_params::SCHED_AUTOPREF_OFF
2229 };
2230
2231 /* Cortex-M7 tuning.  */
2232
2233 const struct tune_params arm_cortex_m7_tune =
2234 {
2235   &v7m_extra_costs,
2236   NULL,                                 /* Sched adj cost.  */
2237   arm_cortex_m7_branch_cost,
2238   &arm_default_vec_cost,
2239   0,                                            /* Constant limit.  */
2240   1,                                            /* Max cond insns.  */
2241   8,                                            /* Memset max inline.  */
2242   2,                                            /* Issue rate.  */
2243   ARM_PREFETCH_NOT_BENEFICIAL,
2244   tune_params::PREF_CONST_POOL_TRUE,
2245   tune_params::PREF_LDRD_FALSE,
2246   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2247   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2248   tune_params::DISPARAGE_FLAGS_NEITHER,
2249   tune_params::PREF_NEON_64_FALSE,
2250   tune_params::PREF_NEON_STRINGOPS_FALSE,
2251   tune_params::FUSE_NOTHING,
2252   tune_params::SCHED_AUTOPREF_OFF
2253 };
2254
2255 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2256    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2257    cortex-m23.  */
2258 const struct tune_params arm_v6m_tune =
2259 {
2260   &generic_extra_costs,                 /* Insn extra costs.  */
2261   NULL,                                 /* Sched adj cost.  */
2262   arm_default_branch_cost,
2263   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2264   1,                                            /* Constant limit.  */
2265   5,                                            /* Max cond insns.  */
2266   8,                                            /* Memset max inline.  */
2267   1,                                            /* Issue rate.  */
2268   ARM_PREFETCH_NOT_BENEFICIAL,
2269   tune_params::PREF_CONST_POOL_FALSE,
2270   tune_params::PREF_LDRD_FALSE,
2271   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2272   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2273   tune_params::DISPARAGE_FLAGS_NEITHER,
2274   tune_params::PREF_NEON_64_FALSE,
2275   tune_params::PREF_NEON_STRINGOPS_FALSE,
2276   tune_params::FUSE_NOTHING,
2277   tune_params::SCHED_AUTOPREF_OFF
2278 };
2279
2280 const struct tune_params arm_fa726te_tune =
2281 {
2282   &generic_extra_costs,                         /* Insn extra costs.  */
2283   fa726te_sched_adjust_cost,
2284   arm_default_branch_cost,
2285   &arm_default_vec_cost,
2286   1,                                            /* Constant limit.  */
2287   5,                                            /* Max cond insns.  */
2288   8,                                            /* Memset max inline.  */
2289   2,                                            /* Issue rate.  */
2290   ARM_PREFETCH_NOT_BENEFICIAL,
2291   tune_params::PREF_CONST_POOL_TRUE,
2292   tune_params::PREF_LDRD_FALSE,
2293   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2294   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2295   tune_params::DISPARAGE_FLAGS_NEITHER,
2296   tune_params::PREF_NEON_64_FALSE,
2297   tune_params::PREF_NEON_STRINGOPS_FALSE,
2298   tune_params::FUSE_NOTHING,
2299   tune_params::SCHED_AUTOPREF_OFF
2300 };
2301
2302 /* Auto-generated CPU, FPU and architecture tables.  */
2303 #include "arm-cpu-data.h"
2304
2305 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2306    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2307    is thus chosen to be big enough to hold the longest architecture name.  */
2308
2309 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2310
2311 /* Supported TLS relocations.  */
2312
2313 enum tls_reloc {
2314   TLS_GD32,
2315   TLS_LDM32,
2316   TLS_LDO32,
2317   TLS_IE32,
2318   TLS_LE32,
2319   TLS_DESCSEQ   /* GNU scheme */
2320 };
2321
2322 /* The maximum number of insns to be used when loading a constant.  */
2323 inline static int
2324 arm_constant_limit (bool size_p)
2325 {
2326   return size_p ? 1 : current_tune->constant_limit;
2327 }
2328
2329 /* Emit an insn that's a simple single-set.  Both the operands must be known
2330    to be valid.  */
2331 inline static rtx_insn *
2332 emit_set_insn (rtx x, rtx y)
2333 {
2334   return emit_insn (gen_rtx_SET (x, y));
2335 }
2336
2337 /* Return the number of bits set in VALUE.  */
2338 static unsigned
2339 bit_count (unsigned long value)
2340 {
2341   unsigned long count = 0;
2342
2343   while (value)
2344     {
2345       count++;
2346       value &= value - 1;  /* Clear the least-significant set bit.  */
2347     }
2348
2349   return count;
2350 }
2351
2352 /* Return the number of bits set in BMAP.  */
2353 static unsigned
2354 bitmap_popcount (const sbitmap bmap)
2355 {
2356   unsigned int count = 0;
2357   unsigned int n = 0;
2358   sbitmap_iterator sbi;
2359
2360   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2361     count++;
2362   return count;
2363 }
2364
2365 typedef struct
2366 {
2367   machine_mode mode;
2368   const char *name;
2369 } arm_fixed_mode_set;
2370
2371 /* A small helper for setting fixed-point library libfuncs.  */
2372
2373 static void
2374 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2375                              const char *funcname, const char *modename,
2376                              int num_suffix)
2377 {
2378   char buffer[50];
2379
2380   if (num_suffix == 0)
2381     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2382   else
2383     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2384
2385   set_optab_libfunc (optable, mode, buffer);
2386 }
2387
2388 static void
2389 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2390                             machine_mode from, const char *funcname,
2391                             const char *toname, const char *fromname)
2392 {
2393   char buffer[50];
2394   const char *maybe_suffix_2 = "";
2395
2396   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2397   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2398       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2399       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2400     maybe_suffix_2 = "2";
2401
2402   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2403            maybe_suffix_2);
2404
2405   set_conv_libfunc (optable, to, from, buffer);
2406 }
2407
2408 /* Set up library functions unique to ARM.  */
2409
2410 static void
2411 arm_init_libfuncs (void)
2412 {
2413   /* For Linux, we have access to kernel support for atomic operations.  */
2414   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2415     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2416
2417   /* There are no special library functions unless we are using the
2418      ARM BPABI.  */
2419   if (!TARGET_BPABI)
2420     return;
2421
2422   /* The functions below are described in Section 4 of the "Run-Time
2423      ABI for the ARM architecture", Version 1.0.  */
2424
2425   /* Double-precision floating-point arithmetic.  Table 2.  */
2426   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2427   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2428   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2429   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2430   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2431
2432   /* Double-precision comparisons.  Table 3.  */
2433   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2434   set_optab_libfunc (ne_optab, DFmode, NULL);
2435   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2436   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2437   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2438   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2439   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2440
2441   /* Single-precision floating-point arithmetic.  Table 4.  */
2442   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2443   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2444   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2445   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2446   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2447
2448   /* Single-precision comparisons.  Table 5.  */
2449   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2450   set_optab_libfunc (ne_optab, SFmode, NULL);
2451   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2452   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2453   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2454   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2455   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2456
2457   /* Floating-point to integer conversions.  Table 6.  */
2458   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2459   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2460   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2461   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2462   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2463   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2464   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2465   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2466
2467   /* Conversions between floating types.  Table 7.  */
2468   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2469   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2470
2471   /* Integer to floating-point conversions.  Table 8.  */
2472   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2473   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2474   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2475   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2476   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2477   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2478   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2479   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2480
2481   /* Long long.  Table 9.  */
2482   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2483   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2484   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2485   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2486   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2487   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2488   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2489   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2490
2491   /* Integer (32/32->32) division.  \S 4.3.1.  */
2492   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2493   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2494
2495   /* The divmod functions are designed so that they can be used for
2496      plain division, even though they return both the quotient and the
2497      remainder.  The quotient is returned in the usual location (i.e.,
2498      r0 for SImode, {r0, r1} for DImode), just as would be expected
2499      for an ordinary division routine.  Because the AAPCS calling
2500      conventions specify that all of { r0, r1, r2, r3 } are
2501      callee-saved registers, there is no need to tell the compiler
2502      explicitly that those registers are clobbered by these
2503      routines.  */
2504   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2505   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2506
2507   /* For SImode division the ABI provides div-without-mod routines,
2508      which are faster.  */
2509   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2510   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2511
2512   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2513      divmod libcalls instead.  */
2514   set_optab_libfunc (smod_optab, DImode, NULL);
2515   set_optab_libfunc (umod_optab, DImode, NULL);
2516   set_optab_libfunc (smod_optab, SImode, NULL);
2517   set_optab_libfunc (umod_optab, SImode, NULL);
2518
2519   /* Half-precision float operations.  The compiler handles all operations
2520      with NULL libfuncs by converting the SFmode.  */
2521   switch (arm_fp16_format)
2522     {
2523     case ARM_FP16_FORMAT_IEEE:
2524     case ARM_FP16_FORMAT_ALTERNATIVE:
2525
2526       /* Conversions.  */
2527       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2528                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2529                          ? "__gnu_f2h_ieee"
2530                          : "__gnu_f2h_alternative"));
2531       set_conv_libfunc (sext_optab, SFmode, HFmode,
2532                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533                          ? "__gnu_h2f_ieee"
2534                          : "__gnu_h2f_alternative"));
2535
2536       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2537                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538                          ? "__gnu_d2h_ieee"
2539                          : "__gnu_d2h_alternative"));
2540
2541       /* Arithmetic.  */
2542       set_optab_libfunc (add_optab, HFmode, NULL);
2543       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2544       set_optab_libfunc (smul_optab, HFmode, NULL);
2545       set_optab_libfunc (neg_optab, HFmode, NULL);
2546       set_optab_libfunc (sub_optab, HFmode, NULL);
2547
2548       /* Comparisons.  */
2549       set_optab_libfunc (eq_optab, HFmode, NULL);
2550       set_optab_libfunc (ne_optab, HFmode, NULL);
2551       set_optab_libfunc (lt_optab, HFmode, NULL);
2552       set_optab_libfunc (le_optab, HFmode, NULL);
2553       set_optab_libfunc (ge_optab, HFmode, NULL);
2554       set_optab_libfunc (gt_optab, HFmode, NULL);
2555       set_optab_libfunc (unord_optab, HFmode, NULL);
2556       break;
2557
2558     default:
2559       break;
2560     }
2561
2562   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2563   {
2564     const arm_fixed_mode_set fixed_arith_modes[] =
2565       {
2566         { E_QQmode, "qq" },
2567         { E_UQQmode, "uqq" },
2568         { E_HQmode, "hq" },
2569         { E_UHQmode, "uhq" },
2570         { E_SQmode, "sq" },
2571         { E_USQmode, "usq" },
2572         { E_DQmode, "dq" },
2573         { E_UDQmode, "udq" },
2574         { E_TQmode, "tq" },
2575         { E_UTQmode, "utq" },
2576         { E_HAmode, "ha" },
2577         { E_UHAmode, "uha" },
2578         { E_SAmode, "sa" },
2579         { E_USAmode, "usa" },
2580         { E_DAmode, "da" },
2581         { E_UDAmode, "uda" },
2582         { E_TAmode, "ta" },
2583         { E_UTAmode, "uta" }
2584       };
2585     const arm_fixed_mode_set fixed_conv_modes[] =
2586       {
2587         { E_QQmode, "qq" },
2588         { E_UQQmode, "uqq" },
2589         { E_HQmode, "hq" },
2590         { E_UHQmode, "uhq" },
2591         { E_SQmode, "sq" },
2592         { E_USQmode, "usq" },
2593         { E_DQmode, "dq" },
2594         { E_UDQmode, "udq" },
2595         { E_TQmode, "tq" },
2596         { E_UTQmode, "utq" },
2597         { E_HAmode, "ha" },
2598         { E_UHAmode, "uha" },
2599         { E_SAmode, "sa" },
2600         { E_USAmode, "usa" },
2601         { E_DAmode, "da" },
2602         { E_UDAmode, "uda" },
2603         { E_TAmode, "ta" },
2604         { E_UTAmode, "uta" },
2605         { E_QImode, "qi" },
2606         { E_HImode, "hi" },
2607         { E_SImode, "si" },
2608         { E_DImode, "di" },
2609         { E_TImode, "ti" },
2610         { E_SFmode, "sf" },
2611         { E_DFmode, "df" }
2612       };
2613     unsigned int i, j;
2614
2615     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2616       {
2617         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2618                                      "add", fixed_arith_modes[i].name, 3);
2619         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2620                                      "ssadd", fixed_arith_modes[i].name, 3);
2621         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2622                                      "usadd", fixed_arith_modes[i].name, 3);
2623         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2624                                      "sub", fixed_arith_modes[i].name, 3);
2625         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2626                                      "sssub", fixed_arith_modes[i].name, 3);
2627         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2628                                      "ussub", fixed_arith_modes[i].name, 3);
2629         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2630                                      "mul", fixed_arith_modes[i].name, 3);
2631         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2632                                      "ssmul", fixed_arith_modes[i].name, 3);
2633         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2634                                      "usmul", fixed_arith_modes[i].name, 3);
2635         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2636                                      "div", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2638                                      "udiv", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2640                                      "ssdiv", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2642                                      "usdiv", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2644                                      "neg", fixed_arith_modes[i].name, 2);
2645         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2646                                      "ssneg", fixed_arith_modes[i].name, 2);
2647         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2648                                      "usneg", fixed_arith_modes[i].name, 2);
2649         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2650                                      "ashl", fixed_arith_modes[i].name, 3);
2651         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2652                                      "ashr", fixed_arith_modes[i].name, 3);
2653         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2654                                      "lshr", fixed_arith_modes[i].name, 3);
2655         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2656                                      "ssashl", fixed_arith_modes[i].name, 3);
2657         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2658                                      "usashl", fixed_arith_modes[i].name, 3);
2659         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2660                                      "cmp", fixed_arith_modes[i].name, 2);
2661       }
2662
2663     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2664       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2665         {
2666           if (i == j
2667               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2668                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2669             continue;
2670
2671           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2672                                       fixed_conv_modes[j].mode, "fract",
2673                                       fixed_conv_modes[i].name,
2674                                       fixed_conv_modes[j].name);
2675           arm_set_fixed_conv_libfunc (satfract_optab,
2676                                       fixed_conv_modes[i].mode,
2677                                       fixed_conv_modes[j].mode, "satfract",
2678                                       fixed_conv_modes[i].name,
2679                                       fixed_conv_modes[j].name);
2680           arm_set_fixed_conv_libfunc (fractuns_optab,
2681                                       fixed_conv_modes[i].mode,
2682                                       fixed_conv_modes[j].mode, "fractuns",
2683                                       fixed_conv_modes[i].name,
2684                                       fixed_conv_modes[j].name);
2685           arm_set_fixed_conv_libfunc (satfractuns_optab,
2686                                       fixed_conv_modes[i].mode,
2687                                       fixed_conv_modes[j].mode, "satfractuns",
2688                                       fixed_conv_modes[i].name,
2689                                       fixed_conv_modes[j].name);
2690         }
2691   }
2692
2693   if (TARGET_AAPCS_BASED)
2694     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2695 }
2696
2697 /* On AAPCS systems, this is the "struct __va_list".  */
2698 static GTY(()) tree va_list_type;
2699
2700 /* Return the type to use as __builtin_va_list.  */
2701 static tree
2702 arm_build_builtin_va_list (void)
2703 {
2704   tree va_list_name;
2705   tree ap_field;
2706
2707   if (!TARGET_AAPCS_BASED)
2708     return std_build_builtin_va_list ();
2709
2710   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2711      defined as:
2712
2713        struct __va_list
2714        {
2715          void *__ap;
2716        };
2717
2718      The C Library ABI further reinforces this definition in \S
2719      4.1.
2720
2721      We must follow this definition exactly.  The structure tag
2722      name is visible in C++ mangled names, and thus forms a part
2723      of the ABI.  The field name may be used by people who
2724      #include <stdarg.h>.  */
2725   /* Create the type.  */
2726   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2727   /* Give it the required name.  */
2728   va_list_name = build_decl (BUILTINS_LOCATION,
2729                              TYPE_DECL,
2730                              get_identifier ("__va_list"),
2731                              va_list_type);
2732   DECL_ARTIFICIAL (va_list_name) = 1;
2733   TYPE_NAME (va_list_type) = va_list_name;
2734   TYPE_STUB_DECL (va_list_type) = va_list_name;
2735   /* Create the __ap field.  */
2736   ap_field = build_decl (BUILTINS_LOCATION,
2737                          FIELD_DECL,
2738                          get_identifier ("__ap"),
2739                          ptr_type_node);
2740   DECL_ARTIFICIAL (ap_field) = 1;
2741   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2742   TYPE_FIELDS (va_list_type) = ap_field;
2743   /* Compute its layout.  */
2744   layout_type (va_list_type);
2745
2746   return va_list_type;
2747 }
2748
2749 /* Return an expression of type "void *" pointing to the next
2750    available argument in a variable-argument list.  VALIST is the
2751    user-level va_list object, of type __builtin_va_list.  */
2752 static tree
2753 arm_extract_valist_ptr (tree valist)
2754 {
2755   if (TREE_TYPE (valist) == error_mark_node)
2756     return error_mark_node;
2757
2758   /* On an AAPCS target, the pointer is stored within "struct
2759      va_list".  */
2760   if (TARGET_AAPCS_BASED)
2761     {
2762       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2763       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2764                        valist, ap_field, NULL_TREE);
2765     }
2766
2767   return valist;
2768 }
2769
2770 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2771 static void
2772 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2773 {
2774   valist = arm_extract_valist_ptr (valist);
2775   std_expand_builtin_va_start (valist, nextarg);
2776 }
2777
2778 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2779 static tree
2780 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2781                           gimple_seq *post_p)
2782 {
2783   valist = arm_extract_valist_ptr (valist);
2784   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2785 }
2786
2787 /* Check any incompatible options that the user has specified.  */
2788 static void
2789 arm_option_check_internal (struct gcc_options *opts)
2790 {
2791   int flags = opts->x_target_flags;
2792
2793   /* iWMMXt and NEON are incompatible.  */
2794   if (TARGET_IWMMXT
2795       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2796     error ("iWMMXt and NEON are incompatible");
2797
2798   /* Make sure that the processor choice does not conflict with any of the
2799      other command line choices.  */
2800   if (TARGET_ARM_P (flags)
2801       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2802     error ("target CPU does not support ARM mode");
2803
2804   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2805   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2806     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2807
2808   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2809     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2810
2811   /* If this target is normally configured to use APCS frames, warn if they
2812      are turned off and debugging is turned on.  */
2813   if (TARGET_ARM_P (flags)
2814       && write_symbols != NO_DEBUG
2815       && !TARGET_APCS_FRAME
2816       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2817     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2818
2819   /* iWMMXt unsupported under Thumb mode.  */
2820   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2821     error ("iWMMXt unsupported under Thumb mode");
2822
2823   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2824     error ("can not use -mtp=cp15 with 16-bit Thumb");
2825
2826   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2827     {
2828       error ("RTP PIC is incompatible with Thumb");
2829       flag_pic = 0;
2830     }
2831
2832   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2833      with MOVT.  */
2834   if ((target_pure_code || target_slow_flash_data)
2835       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2836     {
2837       const char *flag = (target_pure_code ? "-mpure-code" :
2838                                              "-mslow-flash-data");
2839       error ("%s only supports non-pic code on M-profile targets with the "
2840              "MOVT instruction", flag);
2841     }
2842
2843 }
2844
2845 /* Recompute the global settings depending on target attribute options.  */
2846
2847 static void
2848 arm_option_params_internal (void)
2849 {
2850   /* If we are not using the default (ARM mode) section anchor offset
2851      ranges, then set the correct ranges now.  */
2852   if (TARGET_THUMB1)
2853     {
2854       /* Thumb-1 LDR instructions cannot have negative offsets.
2855          Permissible positive offset ranges are 5-bit (for byte loads),
2856          6-bit (for halfword loads), or 7-bit (for word loads).
2857          Empirical results suggest a 7-bit anchor range gives the best
2858          overall code size.  */
2859       targetm.min_anchor_offset = 0;
2860       targetm.max_anchor_offset = 127;
2861     }
2862   else if (TARGET_THUMB2)
2863     {
2864       /* The minimum is set such that the total size of the block
2865          for a particular anchor is 248 + 1 + 4095 bytes, which is
2866          divisible by eight, ensuring natural spacing of anchors.  */
2867       targetm.min_anchor_offset = -248;
2868       targetm.max_anchor_offset = 4095;
2869     }
2870   else
2871     {
2872       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2874     }
2875
2876   /* Increase the number of conditional instructions with -Os.  */
2877   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2878
2879   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2880   if (TARGET_THUMB2)
2881     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2882 }
2883
2884 /* True if -mflip-thumb should next add an attribute for the default
2885    mode, false if it should next add an attribute for the opposite mode.  */
2886 static GTY(()) bool thumb_flipper;
2887
2888 /* Options after initial target override.  */
2889 static GTY(()) tree init_optimize;
2890
2891 static void
2892 arm_override_options_after_change_1 (struct gcc_options *opts)
2893 {
2894   if (opts->x_align_functions <= 0)
2895     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2896       && opts->x_optimize_size ? 2 : 4;
2897 }
2898
2899 /* Implement targetm.override_options_after_change.  */
2900
2901 static void
2902 arm_override_options_after_change (void)
2903 {
2904   arm_configure_build_target (&arm_active_target,
2905                               TREE_TARGET_OPTION (target_option_default_node),
2906                               &global_options_set, false);
2907
2908   arm_override_options_after_change_1 (&global_options);
2909 }
2910
2911 /* Implement TARGET_OPTION_SAVE.  */
2912 static void
2913 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2914 {
2915   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2916   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2917   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2918 }
2919
2920 /* Implement TARGET_OPTION_RESTORE.  */
2921 static void
2922 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2923 {
2924   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2925   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2926   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2927   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2928                               false);
2929 }
2930
2931 /* Reset options between modes that the user has specified.  */
2932 static void
2933 arm_option_override_internal (struct gcc_options *opts,
2934                               struct gcc_options *opts_set)
2935 {
2936   arm_override_options_after_change_1 (opts);
2937
2938   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2939     {
2940       /* The default is to enable interworking, so this warning message would
2941          be confusing to users who have just compiled with, eg, -march=armv3.  */
2942       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2943       opts->x_target_flags &= ~MASK_INTERWORK;
2944     }
2945
2946   if (TARGET_THUMB_P (opts->x_target_flags)
2947       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948     {
2949       warning (0, "target CPU does not support THUMB instructions");
2950       opts->x_target_flags &= ~MASK_THUMB;
2951     }
2952
2953   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2954     {
2955       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2956       opts->x_target_flags &= ~MASK_APCS_FRAME;
2957     }
2958
2959   /* Callee super interworking implies thumb interworking.  Adding
2960      this to the flags here simplifies the logic elsewhere.  */
2961   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2962     opts->x_target_flags |= MASK_INTERWORK;
2963
2964   /* need to remember initial values so combinaisons of options like
2965      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2966   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2967
2968   if (! opts_set->x_arm_restrict_it)
2969     opts->x_arm_restrict_it = arm_arch8;
2970
2971   /* ARM execution state and M profile don't have [restrict] IT.  */
2972   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2973     opts->x_arm_restrict_it = 0;
2974
2975   /* Enable -munaligned-access by default for
2976      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2977      i.e. Thumb2 and ARM state only.
2978      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979      - ARMv8 architecture-base processors.
2980
2981      Disable -munaligned-access by default for
2982      - all pre-ARMv6 architecture-based processors
2983      - ARMv6-M architecture-based processors
2984      - ARMv8-M Baseline processors.  */
2985
2986   if (! opts_set->x_unaligned_access)
2987     {
2988       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2989                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2990     }
2991   else if (opts->x_unaligned_access == 1
2992            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2993     {
2994       warning (0, "target CPU does not support unaligned accesses");
2995      opts->x_unaligned_access = 0;
2996     }
2997
2998   /* Don't warn since it's on by default in -O2.  */
2999   if (TARGET_THUMB1_P (opts->x_target_flags))
3000     opts->x_flag_schedule_insns = 0;
3001   else
3002     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3003
3004   /* Disable shrink-wrap when optimizing function for size, since it tends to
3005      generate additional returns.  */
3006   if (optimize_function_for_size_p (cfun)
3007       && TARGET_THUMB2_P (opts->x_target_flags))
3008     opts->x_flag_shrink_wrap = false;
3009   else
3010     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3011
3012   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3013      - epilogue_insns - does not accurately model the corresponding insns
3014      emitted in the asm file.  In particular, see the comment in thumb_exit
3015      'Find out how many of the (return) argument registers we can corrupt'.
3016      As a consequence, the epilogue may clobber registers without fipa-ra
3017      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3018      TODO: Accurately model clobbers for epilogue_insns and reenable
3019      fipa-ra.  */
3020   if (TARGET_THUMB1_P (opts->x_target_flags))
3021     opts->x_flag_ipa_ra = 0;
3022   else
3023     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3024
3025   /* Thumb2 inline assembly code should always use unified syntax.
3026      This will apply to ARM and Thumb1 eventually.  */
3027   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3028
3029 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3030   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3031 #endif
3032 }
3033
3034 static sbitmap isa_all_fpubits;
3035 static sbitmap isa_quirkbits;
3036
3037 /* Configure a build target TARGET from the user-specified options OPTS and
3038    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3039    architecture have been specified, but the two are not identical.  */
3040 void
3041 arm_configure_build_target (struct arm_build_target *target,
3042                             struct cl_target_option *opts,
3043                             struct gcc_options *opts_set,
3044                             bool warn_compatible)
3045 {
3046   const cpu_option *arm_selected_tune = NULL;
3047   const arch_option *arm_selected_arch = NULL;
3048   const cpu_option *arm_selected_cpu = NULL;
3049   const arm_fpu_desc *arm_selected_fpu = NULL;
3050   const char *tune_opts = NULL;
3051   const char *arch_opts = NULL;
3052   const char *cpu_opts = NULL;
3053
3054   bitmap_clear (target->isa);
3055   target->core_name = NULL;
3056   target->arch_name = NULL;
3057
3058   if (opts_set->x_arm_arch_string)
3059     {
3060       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3061                                                       "-march",
3062                                                       opts->x_arm_arch_string);
3063       arch_opts = strchr (opts->x_arm_arch_string, '+');
3064     }
3065
3066   if (opts_set->x_arm_cpu_string)
3067     {
3068       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3069                                                     opts->x_arm_cpu_string);
3070       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3071       arm_selected_tune = arm_selected_cpu;
3072       /* If taking the tuning from -mcpu, we don't need to rescan the
3073          options for tuning.  */
3074     }
3075
3076   if (opts_set->x_arm_tune_string)
3077     {
3078       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3079                                                      opts->x_arm_tune_string);
3080       tune_opts = strchr (opts->x_arm_tune_string, '+');
3081     }
3082
3083   if (arm_selected_arch)
3084     {
3085       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3086       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3087                                  arch_opts);
3088
3089       if (arm_selected_cpu)
3090         {
3091           auto_sbitmap cpu_isa (isa_num_bits);
3092           auto_sbitmap isa_delta (isa_num_bits);
3093
3094           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3095           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3096                                      cpu_opts);
3097           bitmap_xor (isa_delta, cpu_isa, target->isa);
3098           /* Ignore any bits that are quirk bits.  */
3099           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3100           /* Ignore (for now) any bits that might be set by -mfpu.  */
3101           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3102
3103           if (!bitmap_empty_p (isa_delta))
3104             {
3105               if (warn_compatible)
3106                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3107                          arm_selected_cpu->common.name,
3108                          arm_selected_arch->common.name);
3109               /* -march wins for code generation.
3110                  -mcpu wins for default tuning.  */
3111               if (!arm_selected_tune)
3112                 arm_selected_tune = arm_selected_cpu;
3113
3114               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3115               target->arch_name = arm_selected_arch->common.name;
3116             }
3117           else
3118             {
3119               /* Architecture and CPU are essentially the same.
3120                  Prefer the CPU setting.  */
3121               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3122               target->core_name = arm_selected_cpu->common.name;
3123               /* Copy the CPU's capabilities, so that we inherit the
3124                  appropriate extensions and quirks.  */
3125               bitmap_copy (target->isa, cpu_isa);
3126             }
3127         }
3128       else
3129         {
3130           /* Pick a CPU based on the architecture.  */
3131           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3132           target->arch_name = arm_selected_arch->common.name;
3133           /* Note: target->core_name is left unset in this path.  */
3134         }
3135     }
3136   else if (arm_selected_cpu)
3137     {
3138       target->core_name = arm_selected_cpu->common.name;
3139       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3140       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3141                                  cpu_opts);
3142       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3143     }
3144   /* If the user did not specify a processor or architecture, choose
3145      one for them.  */
3146   else
3147     {
3148       const cpu_option *sel;
3149       auto_sbitmap sought_isa (isa_num_bits);
3150       bitmap_clear (sought_isa);
3151       auto_sbitmap default_isa (isa_num_bits);
3152
3153       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3154                                                     TARGET_CPU_DEFAULT);
3155       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3156       gcc_assert (arm_selected_cpu->common.name);
3157
3158       /* RWE: All of the selection logic below (to the end of this
3159          'if' clause) looks somewhat suspect.  It appears to be mostly
3160          there to support forcing thumb support when the default CPU
3161          does not have thumb (somewhat dubious in terms of what the
3162          user might be expecting).  I think it should be removed once
3163          support for the pre-thumb era cores is removed.  */
3164       sel = arm_selected_cpu;
3165       arm_initialize_isa (default_isa, sel->common.isa_bits);
3166       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3167                                  cpu_opts);
3168
3169       /* Now check to see if the user has specified any command line
3170          switches that require certain abilities from the cpu.  */
3171
3172       if (TARGET_INTERWORK || TARGET_THUMB)
3173         {
3174           bitmap_set_bit (sought_isa, isa_bit_thumb);
3175           bitmap_set_bit (sought_isa, isa_bit_mode32);
3176
3177           /* There are no ARM processors that support both APCS-26 and
3178              interworking.  Therefore we forcibly remove MODE26 from
3179              from the isa features here (if it was set), so that the
3180              search below will always be able to find a compatible
3181              processor.  */
3182           bitmap_clear_bit (default_isa, isa_bit_mode26);
3183         }
3184
3185       /* If there are such requirements and the default CPU does not
3186          satisfy them, we need to run over the complete list of
3187          cores looking for one that is satisfactory.  */
3188       if (!bitmap_empty_p (sought_isa)
3189           && !bitmap_subset_p (sought_isa, default_isa))
3190         {
3191           auto_sbitmap candidate_isa (isa_num_bits);
3192           /* We're only interested in a CPU with at least the
3193              capabilities of the default CPU and the required
3194              additional features.  */
3195           bitmap_ior (default_isa, default_isa, sought_isa);
3196
3197           /* Try to locate a CPU type that supports all of the abilities
3198              of the default CPU, plus the extra abilities requested by
3199              the user.  */
3200           for (sel = all_cores; sel->common.name != NULL; sel++)
3201             {
3202               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3203               /* An exact match?  */
3204               if (bitmap_equal_p (default_isa, candidate_isa))
3205                 break;
3206             }
3207
3208           if (sel->common.name == NULL)
3209             {
3210               unsigned current_bit_count = isa_num_bits;
3211               const cpu_option *best_fit = NULL;
3212
3213               /* Ideally we would like to issue an error message here
3214                  saying that it was not possible to find a CPU compatible
3215                  with the default CPU, but which also supports the command
3216                  line options specified by the programmer, and so they
3217                  ought to use the -mcpu=<name> command line option to
3218                  override the default CPU type.
3219
3220                  If we cannot find a CPU that has exactly the
3221                  characteristics of the default CPU and the given
3222                  command line options we scan the array again looking
3223                  for a best match.  The best match must have at least
3224                  the capabilities of the perfect match.  */
3225               for (sel = all_cores; sel->common.name != NULL; sel++)
3226                 {
3227                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3228
3229                   if (bitmap_subset_p (default_isa, candidate_isa))
3230                     {
3231                       unsigned count;
3232
3233                       bitmap_and_compl (candidate_isa, candidate_isa,
3234                                         default_isa);
3235                       count = bitmap_popcount (candidate_isa);
3236
3237                       if (count < current_bit_count)
3238                         {
3239                           best_fit = sel;
3240                           current_bit_count = count;
3241                         }
3242                     }
3243
3244                   gcc_assert (best_fit);
3245                   sel = best_fit;
3246                 }
3247             }
3248           arm_selected_cpu = sel;
3249         }
3250
3251       /* Now we know the CPU, we can finally initialize the target
3252          structure.  */
3253       target->core_name = arm_selected_cpu->common.name;
3254       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3255       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3256                                  cpu_opts);
3257       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3258     }
3259
3260   gcc_assert (arm_selected_cpu);
3261   gcc_assert (arm_selected_arch);
3262
3263   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3264     {
3265       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3266       auto_sbitmap fpu_bits (isa_num_bits);
3267
3268       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3269       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3270       bitmap_ior (target->isa, target->isa, fpu_bits);
3271     }
3272
3273   if (!arm_selected_tune)
3274     arm_selected_tune = arm_selected_cpu;
3275   else /* Validate the features passed to -mtune.  */
3276     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3277
3278   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3279
3280   /* Finish initializing the target structure.  */
3281   target->arch_pp_name = arm_selected_arch->arch;
3282   target->base_arch = arm_selected_arch->base_arch;
3283   target->profile = arm_selected_arch->profile;
3284
3285   target->tune_flags = tune_data->tune_flags;
3286   target->tune = tune_data->tune;
3287   target->tune_core = tune_data->scheduler;
3288 }
3289
3290 /* Fix up any incompatible options that the user has specified.  */
3291 static void
3292 arm_option_override (void)
3293 {
3294   static const enum isa_feature fpu_bitlist[]
3295     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3296   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3297   cl_target_option opts;
3298
3299   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3300   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3301
3302   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3303   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3304
3305   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3306
3307   if (!global_options_set.x_arm_fpu_index)
3308     {
3309       bool ok;
3310       int fpu_index;
3311
3312       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3313                                   CL_TARGET);
3314       gcc_assert (ok);
3315       arm_fpu_index = (enum fpu_type) fpu_index;
3316     }
3317
3318   cl_target_option_save (&opts, &global_options);
3319   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3320                               true);
3321
3322 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3323   SUBTARGET_OVERRIDE_OPTIONS;
3324 #endif
3325
3326   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3327   arm_base_arch = arm_active_target.base_arch;
3328
3329   arm_tune = arm_active_target.tune_core;
3330   tune_flags = arm_active_target.tune_flags;
3331   current_tune = arm_active_target.tune;
3332
3333   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3334   if (TARGET_APCS_FRAME)
3335     flag_shrink_wrap = false;
3336
3337   /* BPABI targets use linker tricks to allow interworking on cores
3338      without thumb support.  */
3339   if (TARGET_INTERWORK
3340       && !TARGET_BPABI
3341       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3342     {
3343       warning (0, "target CPU does not support interworking" );
3344       target_flags &= ~MASK_INTERWORK;
3345     }
3346
3347   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3348     {
3349       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3350       target_flags |= MASK_APCS_FRAME;
3351     }
3352
3353   if (TARGET_POKE_FUNCTION_NAME)
3354     target_flags |= MASK_APCS_FRAME;
3355
3356   if (TARGET_APCS_REENT && flag_pic)
3357     error ("-fpic and -mapcs-reent are incompatible");
3358
3359   if (TARGET_APCS_REENT)
3360     warning (0, "APCS reentrant code not supported.  Ignored");
3361
3362   /* Initialize boolean versions of the architectural flags, for use
3363      in the arm.md file.  */
3364   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3365   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3366   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3367   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3368   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3369   arm_arch5te = arm_arch5e
3370     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3371   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3372   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3373   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3374   arm_arch6m = arm_arch6 && !arm_arch_notm;
3375   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3376   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3377   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3378   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3379   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3380   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3381   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3382   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3383   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3384   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3385   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3386   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3387   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3388   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3389   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3390   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3391   if (arm_fp16_inst)
3392     {
3393       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3394         error ("selected fp16 options are incompatible");
3395       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3396     }
3397
3398
3399   /* Set up some tuning parameters.  */
3400   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3401   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3402   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3403   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3404   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3405   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3406
3407   /* And finally, set up some quirks.  */
3408   arm_arch_no_volatile_ce
3409     = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3410   arm_arch6kz
3411     = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3412
3413   /* V5 code we generate is completely interworking capable, so we turn off
3414      TARGET_INTERWORK here to avoid many tests later on.  */
3415
3416   /* XXX However, we must pass the right pre-processor defines to CPP
3417      or GLD can get confused.  This is a hack.  */
3418   if (TARGET_INTERWORK)
3419     arm_cpp_interwork = 1;
3420
3421   if (arm_arch5)
3422     target_flags &= ~MASK_INTERWORK;
3423
3424   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3425     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3426
3427   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3428     error ("iwmmxt abi requires an iwmmxt capable cpu");
3429
3430   /* If soft-float is specified then don't use FPU.  */
3431   if (TARGET_SOFT_FLOAT)
3432     arm_fpu_attr = FPU_NONE;
3433   else
3434     arm_fpu_attr = FPU_VFP;
3435
3436   if (TARGET_AAPCS_BASED)
3437     {
3438       if (TARGET_CALLER_INTERWORKING)
3439         error ("AAPCS does not support -mcaller-super-interworking");
3440       else
3441         if (TARGET_CALLEE_INTERWORKING)
3442           error ("AAPCS does not support -mcallee-super-interworking");
3443     }
3444
3445   /* __fp16 support currently assumes the core has ldrh.  */
3446   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3447     sorry ("__fp16 and no ldrh");
3448
3449   if (TARGET_AAPCS_BASED)
3450     {
3451       if (arm_abi == ARM_ABI_IWMMXT)
3452         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3453       else if (TARGET_HARD_FLOAT_ABI)
3454         {
3455           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3456           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3457             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3458         }
3459       else
3460         arm_pcs_default = ARM_PCS_AAPCS;
3461     }
3462   else
3463     {
3464       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3465         sorry ("-mfloat-abi=hard and VFP");
3466
3467       if (arm_abi == ARM_ABI_APCS)
3468         arm_pcs_default = ARM_PCS_APCS;
3469       else
3470         arm_pcs_default = ARM_PCS_ATPCS;
3471     }
3472
3473   /* For arm2/3 there is no need to do any scheduling if we are doing
3474      software floating-point.  */
3475   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3476     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3477
3478   /* Use the cp15 method if it is available.  */
3479   if (target_thread_pointer == TP_AUTO)
3480     {
3481       if (arm_arch6k && !TARGET_THUMB1)
3482         target_thread_pointer = TP_CP15;
3483       else
3484         target_thread_pointer = TP_SOFT;
3485     }
3486
3487   /* Override the default structure alignment for AAPCS ABI.  */
3488   if (!global_options_set.x_arm_structure_size_boundary)
3489     {
3490       if (TARGET_AAPCS_BASED)
3491         arm_structure_size_boundary = 8;
3492     }
3493   else
3494     {
3495       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3496
3497       if (arm_structure_size_boundary != 8
3498           && arm_structure_size_boundary != 32
3499           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3500         {
3501           if (ARM_DOUBLEWORD_ALIGN)
3502             warning (0,
3503                      "structure size boundary can only be set to 8, 32 or 64");
3504           else
3505             warning (0, "structure size boundary can only be set to 8 or 32");
3506           arm_structure_size_boundary
3507             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3508         }
3509     }
3510
3511   if (TARGET_VXWORKS_RTP)
3512     {
3513       if (!global_options_set.x_arm_pic_data_is_text_relative)
3514         arm_pic_data_is_text_relative = 0;
3515     }
3516   else if (flag_pic
3517            && !arm_pic_data_is_text_relative
3518            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3519     /* When text & data segments don't have a fixed displacement, the
3520        intended use is with a single, read only, pic base register.
3521        Unless the user explicitly requested not to do that, set
3522        it.  */
3523     target_flags |= MASK_SINGLE_PIC_BASE;
3524
3525   /* If stack checking is disabled, we can use r10 as the PIC register,
3526      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3527   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3528     {
3529       if (TARGET_VXWORKS_RTP)
3530         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3531       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3532     }
3533
3534   if (flag_pic && TARGET_VXWORKS_RTP)
3535     arm_pic_register = 9;
3536
3537   if (arm_pic_register_string != NULL)
3538     {
3539       int pic_register = decode_reg_name (arm_pic_register_string);
3540
3541       if (!flag_pic)
3542         warning (0, "-mpic-register= is useless without -fpic");
3543
3544       /* Prevent the user from choosing an obviously stupid PIC register.  */
3545       else if (pic_register < 0 || call_used_regs[pic_register]
3546                || pic_register == HARD_FRAME_POINTER_REGNUM
3547                || pic_register == STACK_POINTER_REGNUM
3548                || pic_register >= PC_REGNUM
3549                || (TARGET_VXWORKS_RTP
3550                    && (unsigned int) pic_register != arm_pic_register))
3551         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3552       else
3553         arm_pic_register = pic_register;
3554     }
3555
3556   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3557   if (fix_cm3_ldrd == 2)
3558     {
3559       if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3560         fix_cm3_ldrd = 1;
3561       else
3562         fix_cm3_ldrd = 0;
3563     }
3564
3565   /* Hot/Cold partitioning is not currently supported, since we can't
3566      handle literal pool placement in that case.  */
3567   if (flag_reorder_blocks_and_partition)
3568     {
3569       inform (input_location,
3570               "-freorder-blocks-and-partition not supported on this architecture");
3571       flag_reorder_blocks_and_partition = 0;
3572       flag_reorder_blocks = 1;
3573     }
3574
3575   if (flag_pic)
3576     /* Hoisting PIC address calculations more aggressively provides a small,
3577        but measurable, size reduction for PIC code.  Therefore, we decrease
3578        the bar for unrestricted expression hoisting to the cost of PIC address
3579        calculation, which is 2 instructions.  */
3580     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3581                            global_options.x_param_values,
3582                            global_options_set.x_param_values);
3583
3584   /* ARM EABI defaults to strict volatile bitfields.  */
3585   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3586       && abi_version_at_least(2))
3587     flag_strict_volatile_bitfields = 1;
3588
3589   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3590      have deemed it beneficial (signified by setting
3591      prefetch.num_slots to 1 or more).  */
3592   if (flag_prefetch_loop_arrays < 0
3593       && HAVE_prefetch
3594       && optimize >= 3
3595       && current_tune->prefetch.num_slots > 0)
3596     flag_prefetch_loop_arrays = 1;
3597
3598   /* Set up parameters to be used in prefetching algorithm.  Do not
3599      override the defaults unless we are tuning for a core we have
3600      researched values for.  */
3601   if (current_tune->prefetch.num_slots > 0)
3602     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3603                            current_tune->prefetch.num_slots,
3604                            global_options.x_param_values,
3605                            global_options_set.x_param_values);
3606   if (current_tune->prefetch.l1_cache_line_size >= 0)
3607     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3608                            current_tune->prefetch.l1_cache_line_size,
3609                            global_options.x_param_values,
3610                            global_options_set.x_param_values);
3611   if (current_tune->prefetch.l1_cache_size >= 0)
3612     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3613                            current_tune->prefetch.l1_cache_size,
3614                            global_options.x_param_values,
3615                            global_options_set.x_param_values);
3616
3617   /* Use Neon to perform 64-bits operations rather than core
3618      registers.  */
3619   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3620   if (use_neon_for_64bits == 1)
3621      prefer_neon_for_64bits = true;
3622
3623   /* Use the alternative scheduling-pressure algorithm by default.  */
3624   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3625                          global_options.x_param_values,
3626                          global_options_set.x_param_values);
3627
3628   /* Look through ready list and all of queue for instructions
3629      relevant for L2 auto-prefetcher.  */
3630   int param_sched_autopref_queue_depth;
3631
3632   switch (current_tune->sched_autopref)
3633     {
3634     case tune_params::SCHED_AUTOPREF_OFF:
3635       param_sched_autopref_queue_depth = -1;
3636       break;
3637
3638     case tune_params::SCHED_AUTOPREF_RANK:
3639       param_sched_autopref_queue_depth = 0;
3640       break;
3641
3642     case tune_params::SCHED_AUTOPREF_FULL:
3643       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3644       break;
3645
3646     default:
3647       gcc_unreachable ();
3648     }
3649
3650   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3651                          param_sched_autopref_queue_depth,
3652                          global_options.x_param_values,
3653                          global_options_set.x_param_values);
3654
3655   /* Currently, for slow flash data, we just disable literal pools.  We also
3656      disable it for pure-code.  */
3657   if (target_slow_flash_data || target_pure_code)
3658     arm_disable_literal_pool = true;
3659
3660   if (use_cmse && !arm_arch_cmse)
3661     error ("target CPU does not support ARMv8-M Security Extensions");
3662
3663   /* Disable scheduling fusion by default if it's not armv7 processor
3664      or doesn't prefer ldrd/strd.  */
3665   if (flag_schedule_fusion == 2
3666       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3667     flag_schedule_fusion = 0;
3668
3669   /* Need to remember initial options before they are overriden.  */
3670   init_optimize = build_optimization_node (&global_options);
3671
3672   arm_option_override_internal (&global_options, &global_options_set);
3673   arm_option_check_internal (&global_options);
3674   arm_option_params_internal ();
3675
3676   /* Create the default target_options structure.  */
3677   target_option_default_node = target_option_current_node
3678     = build_target_option_node (&global_options);
3679
3680   /* Register global variables with the garbage collector.  */
3681   arm_add_gc_roots ();
3682
3683   /* Init initial mode for testing.  */
3684   thumb_flipper = TARGET_THUMB;
3685 }
3686
3687 static void
3688 arm_add_gc_roots (void)
3689 {
3690   gcc_obstack_init(&minipool_obstack);
3691   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3692 }
3693 \f
3694 /* A table of known ARM exception types.
3695    For use with the interrupt function attribute.  */
3696
3697 typedef struct
3698 {
3699   const char *const arg;
3700   const unsigned long return_value;
3701 }
3702 isr_attribute_arg;
3703
3704 static const isr_attribute_arg isr_attribute_args [] =
3705 {
3706   { "IRQ",   ARM_FT_ISR },
3707   { "irq",   ARM_FT_ISR },
3708   { "FIQ",   ARM_FT_FIQ },
3709   { "fiq",   ARM_FT_FIQ },
3710   { "ABORT", ARM_FT_ISR },
3711   { "abort", ARM_FT_ISR },
3712   { "ABORT", ARM_FT_ISR },
3713   { "abort", ARM_FT_ISR },
3714   { "UNDEF", ARM_FT_EXCEPTION },
3715   { "undef", ARM_FT_EXCEPTION },
3716   { "SWI",   ARM_FT_EXCEPTION },
3717   { "swi",   ARM_FT_EXCEPTION },
3718   { NULL,    ARM_FT_NORMAL }
3719 };
3720
3721 /* Returns the (interrupt) function type of the current
3722    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3723
3724 static unsigned long
3725 arm_isr_value (tree argument)
3726 {
3727   const isr_attribute_arg * ptr;
3728   const char *              arg;
3729
3730   if (!arm_arch_notm)
3731     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3732
3733   /* No argument - default to IRQ.  */
3734   if (argument == NULL_TREE)
3735     return ARM_FT_ISR;
3736
3737   /* Get the value of the argument.  */
3738   if (TREE_VALUE (argument) == NULL_TREE
3739       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3740     return ARM_FT_UNKNOWN;
3741
3742   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3743
3744   /* Check it against the list of known arguments.  */
3745   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3746     if (streq (arg, ptr->arg))
3747       return ptr->return_value;
3748
3749   /* An unrecognized interrupt type.  */
3750   return ARM_FT_UNKNOWN;
3751 }
3752
3753 /* Computes the type of the current function.  */
3754
3755 static unsigned long
3756 arm_compute_func_type (void)
3757 {
3758   unsigned long type = ARM_FT_UNKNOWN;
3759   tree a;
3760   tree attr;
3761
3762   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3763
3764   /* Decide if the current function is volatile.  Such functions
3765      never return, and many memory cycles can be saved by not storing
3766      register values that will never be needed again.  This optimization
3767      was added to speed up context switching in a kernel application.  */
3768   if (optimize > 0
3769       && (TREE_NOTHROW (current_function_decl)
3770           || !(flag_unwind_tables
3771                || (flag_exceptions
3772                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3773       && TREE_THIS_VOLATILE (current_function_decl))
3774     type |= ARM_FT_VOLATILE;
3775
3776   if (cfun->static_chain_decl != NULL)
3777     type |= ARM_FT_NESTED;
3778
3779   attr = DECL_ATTRIBUTES (current_function_decl);
3780
3781   a = lookup_attribute ("naked", attr);
3782   if (a != NULL_TREE)
3783     type |= ARM_FT_NAKED;
3784
3785   a = lookup_attribute ("isr", attr);
3786   if (a == NULL_TREE)
3787     a = lookup_attribute ("interrupt", attr);
3788
3789   if (a == NULL_TREE)
3790     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3791   else
3792     type |= arm_isr_value (TREE_VALUE (a));
3793
3794   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3795     type |= ARM_FT_CMSE_ENTRY;
3796
3797   return type;
3798 }
3799
3800 /* Returns the type of the current function.  */
3801
3802 unsigned long
3803 arm_current_func_type (void)
3804 {
3805   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3806     cfun->machine->func_type = arm_compute_func_type ();
3807
3808   return cfun->machine->func_type;
3809 }
3810
3811 bool
3812 arm_allocate_stack_slots_for_args (void)
3813 {
3814   /* Naked functions should not allocate stack slots for arguments.  */
3815   return !IS_NAKED (arm_current_func_type ());
3816 }
3817
3818 static bool
3819 arm_warn_func_return (tree decl)
3820 {
3821   /* Naked functions are implemented entirely in assembly, including the
3822      return sequence, so suppress warnings about this.  */
3823   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3824 }
3825
3826 \f
3827 /* Output assembler code for a block containing the constant parts
3828    of a trampoline, leaving space for the variable parts.
3829
3830    On the ARM, (if r8 is the static chain regnum, and remembering that
3831    referencing pc adds an offset of 8) the trampoline looks like:
3832            ldr          r8, [pc, #0]
3833            ldr          pc, [pc]
3834            .word        static chain value
3835            .word        function's address
3836    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3837
3838 static void
3839 arm_asm_trampoline_template (FILE *f)
3840 {
3841   fprintf (f, "\t.syntax unified\n");
3842
3843   if (TARGET_ARM)
3844     {
3845       fprintf (f, "\t.arm\n");
3846       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3847       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3848     }
3849   else if (TARGET_THUMB2)
3850     {
3851       fprintf (f, "\t.thumb\n");
3852       /* The Thumb-2 trampoline is similar to the arm implementation.
3853          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3854       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3855                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3856       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3857     }
3858   else
3859     {
3860       ASM_OUTPUT_ALIGN (f, 2);
3861       fprintf (f, "\t.code\t16\n");
3862       fprintf (f, ".Ltrampoline_start:\n");
3863       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3864       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3865       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3866       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3867       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3868       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3869     }
3870   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3871   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872 }
3873
3874 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3875
3876 static void
3877 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3878 {
3879   rtx fnaddr, mem, a_tramp;
3880
3881   emit_block_move (m_tramp, assemble_trampoline_template (),
3882                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3883
3884   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3885   emit_move_insn (mem, chain_value);
3886
3887   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3888   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3889   emit_move_insn (mem, fnaddr);
3890
3891   a_tramp = XEXP (m_tramp, 0);
3892   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3893                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3894                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3895 }
3896
3897 /* Thumb trampolines should be entered in thumb mode, so set
3898    the bottom bit of the address.  */
3899
3900 static rtx
3901 arm_trampoline_adjust_address (rtx addr)
3902 {
3903   if (TARGET_THUMB)
3904     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3905                                 NULL, 0, OPTAB_LIB_WIDEN);
3906   return addr;
3907 }
3908 \f
3909 /* Return 1 if it is possible to return using a single instruction.
3910    If SIBLING is non-null, this is a test for a return before a sibling
3911    call.  SIBLING is the call insn, so we can examine its register usage.  */
3912
3913 int
3914 use_return_insn (int iscond, rtx sibling)
3915 {
3916   int regno;
3917   unsigned int func_type;
3918   unsigned long saved_int_regs;
3919   unsigned HOST_WIDE_INT stack_adjust;
3920   arm_stack_offsets *offsets;
3921
3922   /* Never use a return instruction before reload has run.  */
3923   if (!reload_completed)
3924     return 0;
3925
3926   func_type = arm_current_func_type ();
3927
3928   /* Naked, volatile and stack alignment functions need special
3929      consideration.  */
3930   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3931     return 0;
3932
3933   /* So do interrupt functions that use the frame pointer and Thumb
3934      interrupt functions.  */
3935   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3936     return 0;
3937
3938   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3939       && !optimize_function_for_size_p (cfun))
3940     return 0;
3941
3942   offsets = arm_get_frame_offsets ();
3943   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3944
3945   /* As do variadic functions.  */
3946   if (crtl->args.pretend_args_size
3947       || cfun->machine->uses_anonymous_args
3948       /* Or if the function calls __builtin_eh_return () */
3949       || crtl->calls_eh_return
3950       /* Or if the function calls alloca */
3951       || cfun->calls_alloca
3952       /* Or if there is a stack adjustment.  However, if the stack pointer
3953          is saved on the stack, we can use a pre-incrementing stack load.  */
3954       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3955                                  && stack_adjust == 4))
3956       /* Or if the static chain register was saved above the frame, under the
3957          assumption that the stack pointer isn't saved on the stack.  */
3958       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3959           && arm_compute_static_chain_stack_bytes() != 0))
3960     return 0;
3961
3962   saved_int_regs = offsets->saved_regs_mask;
3963
3964   /* Unfortunately, the insn
3965
3966        ldmib sp, {..., sp, ...}
3967
3968      triggers a bug on most SA-110 based devices, such that the stack
3969      pointer won't be correctly restored if the instruction takes a
3970      page fault.  We work around this problem by popping r3 along with
3971      the other registers, since that is never slower than executing
3972      another instruction.
3973
3974      We test for !arm_arch5 here, because code for any architecture
3975      less than this could potentially be run on one of the buggy
3976      chips.  */
3977   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3978     {
3979       /* Validate that r3 is a call-clobbered register (always true in
3980          the default abi) ...  */
3981       if (!call_used_regs[3])
3982         return 0;
3983
3984       /* ... that it isn't being used for a return value ... */
3985       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3986         return 0;
3987
3988       /* ... or for a tail-call argument ...  */
3989       if (sibling)
3990         {
3991           gcc_assert (CALL_P (sibling));
3992
3993           if (find_regno_fusage (sibling, USE, 3))
3994             return 0;
3995         }
3996
3997       /* ... and that there are no call-saved registers in r0-r2
3998          (always true in the default ABI).  */
3999       if (saved_int_regs & 0x7)
4000         return 0;
4001     }
4002
4003   /* Can't be done if interworking with Thumb, and any registers have been
4004      stacked.  */
4005   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4006     return 0;
4007
4008   /* On StrongARM, conditional returns are expensive if they aren't
4009      taken and multiple registers have been stacked.  */
4010   if (iscond && arm_tune_strongarm)
4011     {
4012       /* Conditional return when just the LR is stored is a simple
4013          conditional-load instruction, that's not expensive.  */
4014       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4015         return 0;
4016
4017       if (flag_pic
4018           && arm_pic_register != INVALID_REGNUM
4019           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4020         return 0;
4021     }
4022
4023   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4024      several instructions if anything needs to be popped.  */
4025   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4026     return 0;
4027
4028   /* If there are saved registers but the LR isn't saved, then we need
4029      two instructions for the return.  */
4030   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4031     return 0;
4032
4033   /* Can't be done if any of the VFP regs are pushed,
4034      since this also requires an insn.  */
4035   if (TARGET_HARD_FLOAT)
4036     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4037       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4038         return 0;
4039
4040   if (TARGET_REALLY_IWMMXT)
4041     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4042       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4043         return 0;
4044
4045   return 1;
4046 }
4047
4048 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4049    shrink-wrapping if possible.  This is the case if we need to emit a
4050    prologue, which we can test by looking at the offsets.  */
4051 bool
4052 use_simple_return_p (void)
4053 {
4054   arm_stack_offsets *offsets;
4055
4056   /* Note this function can be called before or after reload.  */
4057   if (!reload_completed)
4058     arm_compute_frame_layout ();
4059
4060   offsets = arm_get_frame_offsets ();
4061   return offsets->outgoing_args != 0;
4062 }
4063
4064 /* Return TRUE if int I is a valid immediate ARM constant.  */
4065
4066 int
4067 const_ok_for_arm (HOST_WIDE_INT i)
4068 {
4069   int lowbit;
4070
4071   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4072      be all zero, or all one.  */
4073   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4074       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4075           != ((~(unsigned HOST_WIDE_INT) 0)
4076               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4077     return FALSE;
4078
4079   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4080
4081   /* Fast return for 0 and small values.  We must do this for zero, since
4082      the code below can't handle that one case.  */
4083   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4084     return TRUE;
4085
4086   /* Get the number of trailing zeros.  */
4087   lowbit = ffs((int) i) - 1;
4088
4089   /* Only even shifts are allowed in ARM mode so round down to the
4090      nearest even number.  */
4091   if (TARGET_ARM)
4092     lowbit &= ~1;
4093
4094   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4095     return TRUE;
4096
4097   if (TARGET_ARM)
4098     {
4099       /* Allow rotated constants in ARM mode.  */
4100       if (lowbit <= 4
4101            && ((i & ~0xc000003f) == 0
4102                || (i & ~0xf000000f) == 0
4103                || (i & ~0xfc000003) == 0))
4104         return TRUE;
4105     }
4106   else if (TARGET_THUMB2)
4107     {
4108       HOST_WIDE_INT v;
4109
4110       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4111       v = i & 0xff;
4112       v |= v << 16;
4113       if (i == v || i == (v | (v << 8)))
4114         return TRUE;
4115
4116       /* Allow repeated pattern 0xXY00XY00.  */
4117       v = i & 0xff00;
4118       v |= v << 16;
4119       if (i == v)
4120         return TRUE;
4121     }
4122   else if (TARGET_HAVE_MOVT)
4123     {
4124       /* Thumb-1 Targets with MOVT.  */
4125       if (i > 0xffff)
4126         return FALSE;
4127       else
4128         return TRUE;
4129     }
4130
4131   return FALSE;
4132 }
4133
4134 /* Return true if I is a valid constant for the operation CODE.  */
4135 int
4136 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4137 {
4138   if (const_ok_for_arm (i))
4139     return 1;
4140
4141   switch (code)
4142     {
4143     case SET:
4144       /* See if we can use movw.  */
4145       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4146         return 1;
4147       else
4148         /* Otherwise, try mvn.  */
4149         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150
4151     case PLUS:
4152       /* See if we can use addw or subw.  */
4153       if (TARGET_THUMB2
4154           && ((i & 0xfffff000) == 0
4155               || ((-i) & 0xfffff000) == 0))
4156         return 1;
4157       /* Fall through.  */
4158     case COMPARE:
4159     case EQ:
4160     case NE:
4161     case GT:
4162     case LE:
4163     case LT:
4164     case GE:
4165     case GEU:
4166     case LTU:
4167     case GTU:
4168     case LEU:
4169     case UNORDERED:
4170     case ORDERED:
4171     case UNEQ:
4172     case UNGE:
4173     case UNLT:
4174     case UNGT:
4175     case UNLE:
4176       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4177
4178     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4179     case XOR:
4180       return 0;
4181
4182     case IOR:
4183       if (TARGET_THUMB2)
4184         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4185       return 0;
4186
4187     case AND:
4188       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4189
4190     default:
4191       gcc_unreachable ();
4192     }
4193 }
4194
4195 /* Return true if I is a valid di mode constant for the operation CODE.  */
4196 int
4197 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4198 {
4199   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4200   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4201   rtx hi = GEN_INT (hi_val);
4202   rtx lo = GEN_INT (lo_val);
4203
4204   if (TARGET_THUMB1)
4205     return 0;
4206
4207   switch (code)
4208     {
4209     case AND:
4210     case IOR:
4211     case XOR:
4212       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4213               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4214     case PLUS:
4215       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4216
4217     default:
4218       return 0;
4219     }
4220 }
4221
4222 /* Emit a sequence of insns to handle a large constant.
4223    CODE is the code of the operation required, it can be any of SET, PLUS,
4224    IOR, AND, XOR, MINUS;
4225    MODE is the mode in which the operation is being performed;
4226    VAL is the integer to operate on;
4227    SOURCE is the other operand (a register, or a null-pointer for SET);
4228    SUBTARGETS means it is safe to create scratch registers if that will
4229    either produce a simpler sequence, or we will want to cse the values.
4230    Return value is the number of insns emitted.  */
4231
4232 /* ??? Tweak this for thumb2.  */
4233 int
4234 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4235                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4236 {
4237   rtx cond;
4238
4239   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4240     cond = COND_EXEC_TEST (PATTERN (insn));
4241   else
4242     cond = NULL_RTX;
4243
4244   if (subtargets || code == SET
4245       || (REG_P (target) && REG_P (source)
4246           && REGNO (target) != REGNO (source)))
4247     {
4248       /* After arm_reorg has been called, we can't fix up expensive
4249          constants by pushing them into memory so we must synthesize
4250          them in-line, regardless of the cost.  This is only likely to
4251          be more costly on chips that have load delay slots and we are
4252          compiling without running the scheduler (so no splitting
4253          occurred before the final instruction emission).
4254
4255          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4256       */
4257       if (!cfun->machine->after_arm_reorg
4258           && !cond
4259           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4260                                 1, 0)
4261               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4262                  + (code != SET))))
4263         {
4264           if (code == SET)
4265             {
4266               /* Currently SET is the only monadic value for CODE, all
4267                  the rest are diadic.  */
4268               if (TARGET_USE_MOVT)
4269                 arm_emit_movpair (target, GEN_INT (val));
4270               else
4271                 emit_set_insn (target, GEN_INT (val));
4272
4273               return 1;
4274             }
4275           else
4276             {
4277               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4278
4279               if (TARGET_USE_MOVT)
4280                 arm_emit_movpair (temp, GEN_INT (val));
4281               else
4282                 emit_set_insn (temp, GEN_INT (val));
4283
4284               /* For MINUS, the value is subtracted from, since we never
4285                  have subtraction of a constant.  */
4286               if (code == MINUS)
4287                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4288               else
4289                 emit_set_insn (target,
4290                                gen_rtx_fmt_ee (code, mode, source, temp));
4291               return 2;
4292             }
4293         }
4294     }
4295
4296   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4297                            1);
4298 }
4299
4300 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4301    ARM/THUMB2 immediates, and add up to VAL.
4302    Thr function return value gives the number of insns required.  */
4303 static int
4304 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4305                             struct four_ints *return_sequence)
4306 {
4307   int best_consecutive_zeros = 0;
4308   int i;
4309   int best_start = 0;
4310   int insns1, insns2;
4311   struct four_ints tmp_sequence;
4312
4313   /* If we aren't targeting ARM, the best place to start is always at
4314      the bottom, otherwise look more closely.  */
4315   if (TARGET_ARM)
4316     {
4317       for (i = 0; i < 32; i += 2)
4318         {
4319           int consecutive_zeros = 0;
4320
4321           if (!(val & (3 << i)))
4322             {
4323               while ((i < 32) && !(val & (3 << i)))
4324                 {
4325                   consecutive_zeros += 2;
4326                   i += 2;
4327                 }
4328               if (consecutive_zeros > best_consecutive_zeros)
4329                 {
4330                   best_consecutive_zeros = consecutive_zeros;
4331                   best_start = i - consecutive_zeros;
4332                 }
4333               i -= 2;
4334             }
4335         }
4336     }
4337
4338   /* So long as it won't require any more insns to do so, it's
4339      desirable to emit a small constant (in bits 0...9) in the last
4340      insn.  This way there is more chance that it can be combined with
4341      a later addressing insn to form a pre-indexed load or store
4342      operation.  Consider:
4343
4344            *((volatile int *)0xe0000100) = 1;
4345            *((volatile int *)0xe0000110) = 2;
4346
4347      We want this to wind up as:
4348
4349             mov rA, #0xe0000000
4350             mov rB, #1
4351             str rB, [rA, #0x100]
4352             mov rB, #2
4353             str rB, [rA, #0x110]
4354
4355      rather than having to synthesize both large constants from scratch.
4356
4357      Therefore, we calculate how many insns would be required to emit
4358      the constant starting from `best_start', and also starting from
4359      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4360      yield a shorter sequence, we may as well use zero.  */
4361   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4362   if (best_start != 0
4363       && ((HOST_WIDE_INT_1U << best_start) < val))
4364     {
4365       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4366       if (insns2 <= insns1)
4367         {
4368           *return_sequence = tmp_sequence;
4369           insns1 = insns2;
4370         }
4371     }
4372
4373   return insns1;
4374 }
4375
4376 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4377 static int
4378 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4379                              struct four_ints *return_sequence, int i)
4380 {
4381   int remainder = val & 0xffffffff;
4382   int insns = 0;
4383
4384   /* Try and find a way of doing the job in either two or three
4385      instructions.
4386
4387      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4388      location.  We start at position I.  This may be the MSB, or
4389      optimial_immediate_sequence may have positioned it at the largest block
4390      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4391      wrapping around to the top of the word when we drop off the bottom.
4392      In the worst case this code should produce no more than four insns.
4393
4394      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4395      constants, shifted to any arbitrary location.  We should always start
4396      at the MSB.  */
4397   do
4398     {
4399       int end;
4400       unsigned int b1, b2, b3, b4;
4401       unsigned HOST_WIDE_INT result;
4402       int loc;
4403
4404       gcc_assert (insns < 4);
4405
4406       if (i <= 0)
4407         i += 32;
4408
4409       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4410       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4411         {
4412           loc = i;
4413           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4414             /* We can use addw/subw for the last 12 bits.  */
4415             result = remainder;
4416           else
4417             {
4418               /* Use an 8-bit shifted/rotated immediate.  */
4419               end = i - 8;
4420               if (end < 0)
4421                 end += 32;
4422               result = remainder & ((0x0ff << end)
4423                                    | ((i < end) ? (0xff >> (32 - end))
4424                                                 : 0));
4425               i -= 8;
4426             }
4427         }
4428       else
4429         {
4430           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4431              arbitrary shifts.  */
4432           i -= TARGET_ARM ? 2 : 1;
4433           continue;
4434         }
4435
4436       /* Next, see if we can do a better job with a thumb2 replicated
4437          constant.
4438
4439          We do it this way around to catch the cases like 0x01F001E0 where
4440          two 8-bit immediates would work, but a replicated constant would
4441          make it worse.
4442
4443          TODO: 16-bit constants that don't clear all the bits, but still win.
4444          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4445       if (TARGET_THUMB2)
4446         {
4447           b1 = (remainder & 0xff000000) >> 24;
4448           b2 = (remainder & 0x00ff0000) >> 16;
4449           b3 = (remainder & 0x0000ff00) >> 8;
4450           b4 = remainder & 0xff;
4451
4452           if (loc > 24)
4453             {
4454               /* The 8-bit immediate already found clears b1 (and maybe b2),
4455                  but must leave b3 and b4 alone.  */
4456
4457               /* First try to find a 32-bit replicated constant that clears
4458                  almost everything.  We can assume that we can't do it in one,
4459                  or else we wouldn't be here.  */
4460               unsigned int tmp = b1 & b2 & b3 & b4;
4461               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4462                                   + (tmp << 24);
4463               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4464                                             + (tmp == b3) + (tmp == b4);
4465               if (tmp
4466                   && (matching_bytes >= 3
4467                       || (matching_bytes == 2
4468                           && const_ok_for_op (remainder & ~tmp2, code))))
4469                 {
4470                   /* At least 3 of the bytes match, and the fourth has at
4471                      least as many bits set, or two of the bytes match
4472                      and it will only require one more insn to finish.  */
4473                   result = tmp2;
4474                   i = tmp != b1 ? 32
4475                       : tmp != b2 ? 24
4476                       : tmp != b3 ? 16
4477                       : 8;
4478                 }
4479
4480               /* Second, try to find a 16-bit replicated constant that can
4481                  leave three of the bytes clear.  If b2 or b4 is already
4482                  zero, then we can.  If the 8-bit from above would not
4483                  clear b2 anyway, then we still win.  */
4484               else if (b1 == b3 && (!b2 || !b4
4485                                || (remainder & 0x00ff0000 & ~result)))
4486                 {
4487                   result = remainder & 0xff00ff00;
4488                   i = 24;
4489                 }
4490             }
4491           else if (loc > 16)
4492             {
4493               /* The 8-bit immediate already found clears b2 (and maybe b3)
4494                  and we don't get here unless b1 is alredy clear, but it will
4495                  leave b4 unchanged.  */
4496
4497               /* If we can clear b2 and b4 at once, then we win, since the
4498                  8-bits couldn't possibly reach that far.  */
4499               if (b2 == b4)
4500                 {
4501                   result = remainder & 0x00ff00ff;
4502                   i = 16;
4503                 }
4504             }
4505         }
4506
4507       return_sequence->i[insns++] = result;
4508       remainder &= ~result;
4509
4510       if (code == SET || code == MINUS)
4511         code = PLUS;
4512     }
4513   while (remainder);
4514
4515   return insns;
4516 }
4517
4518 /* Emit an instruction with the indicated PATTERN.  If COND is
4519    non-NULL, conditionalize the execution of the instruction on COND
4520    being true.  */
4521
4522 static void
4523 emit_constant_insn (rtx cond, rtx pattern)
4524 {
4525   if (cond)
4526     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4527   emit_insn (pattern);
4528 }
4529
4530 /* As above, but extra parameter GENERATE which, if clear, suppresses
4531    RTL generation.  */
4532
4533 static int
4534 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4535                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4536                   int subtargets, int generate)
4537 {
4538   int can_invert = 0;
4539   int can_negate = 0;
4540   int final_invert = 0;
4541   int i;
4542   int set_sign_bit_copies = 0;
4543   int clear_sign_bit_copies = 0;
4544   int clear_zero_bit_copies = 0;
4545   int set_zero_bit_copies = 0;
4546   int insns = 0, neg_insns, inv_insns;
4547   unsigned HOST_WIDE_INT temp1, temp2;
4548   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4549   struct four_ints *immediates;
4550   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4551
4552   /* Find out which operations are safe for a given CODE.  Also do a quick
4553      check for degenerate cases; these can occur when DImode operations
4554      are split.  */
4555   switch (code)
4556     {
4557     case SET:
4558       can_invert = 1;
4559       break;
4560
4561     case PLUS:
4562       can_negate = 1;
4563       break;
4564
4565     case IOR:
4566       if (remainder == 0xffffffff)
4567         {
4568           if (generate)
4569             emit_constant_insn (cond,
4570                                 gen_rtx_SET (target,
4571                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4572           return 1;
4573         }
4574
4575       if (remainder == 0)
4576         {
4577           if (reload_completed && rtx_equal_p (target, source))
4578             return 0;
4579
4580           if (generate)
4581             emit_constant_insn (cond, gen_rtx_SET (target, source));
4582           return 1;
4583         }
4584       break;
4585
4586     case AND:
4587       if (remainder == 0)
4588         {
4589           if (generate)
4590             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4591           return 1;
4592         }
4593       if (remainder == 0xffffffff)
4594         {
4595           if (reload_completed && rtx_equal_p (target, source))
4596             return 0;
4597           if (generate)
4598             emit_constant_insn (cond, gen_rtx_SET (target, source));
4599           return 1;
4600         }
4601       can_invert = 1;
4602       break;
4603
4604     case XOR:
4605       if (remainder == 0)
4606         {
4607           if (reload_completed && rtx_equal_p (target, source))
4608             return 0;
4609           if (generate)
4610             emit_constant_insn (cond, gen_rtx_SET (target, source));
4611           return 1;
4612         }
4613
4614       if (remainder == 0xffffffff)
4615         {
4616           if (generate)
4617             emit_constant_insn (cond,
4618                                 gen_rtx_SET (target,
4619                                              gen_rtx_NOT (mode, source)));
4620           return 1;
4621         }
4622       final_invert = 1;
4623       break;
4624
4625     case MINUS:
4626       /* We treat MINUS as (val - source), since (source - val) is always
4627          passed as (source + (-val)).  */
4628       if (remainder == 0)
4629         {
4630           if (generate)
4631             emit_constant_insn (cond,
4632                                 gen_rtx_SET (target,
4633                                              gen_rtx_NEG (mode, source)));
4634           return 1;
4635         }
4636       if (const_ok_for_arm (val))
4637         {
4638           if (generate)
4639             emit_constant_insn (cond,
4640                                 gen_rtx_SET (target,
4641                                              gen_rtx_MINUS (mode, GEN_INT (val),
4642                                                             source)));
4643           return 1;
4644         }
4645
4646       break;
4647
4648     default:
4649       gcc_unreachable ();
4650     }
4651
4652   /* If we can do it in one insn get out quickly.  */
4653   if (const_ok_for_op (val, code))
4654     {
4655       if (generate)
4656         emit_constant_insn (cond,
4657                             gen_rtx_SET (target,
4658                                          (source
4659                                           ? gen_rtx_fmt_ee (code, mode, source,
4660                                                             GEN_INT (val))
4661                                           : GEN_INT (val))));
4662       return 1;
4663     }
4664
4665   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4666      insn.  */
4667   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4668       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4669     {
4670       if (generate)
4671         {
4672           if (mode == SImode && i == 16)
4673             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4674                smaller insn.  */
4675             emit_constant_insn (cond,
4676                                 gen_zero_extendhisi2
4677                                 (target, gen_lowpart (HImode, source)));
4678           else
4679             /* Extz only supports SImode, but we can coerce the operands
4680                into that mode.  */
4681             emit_constant_insn (cond,
4682                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4683                                               gen_lowpart (SImode, source),
4684                                               GEN_INT (i), const0_rtx));
4685         }
4686
4687       return 1;
4688     }
4689
4690   /* Calculate a few attributes that may be useful for specific
4691      optimizations.  */
4692   /* Count number of leading zeros.  */
4693   for (i = 31; i >= 0; i--)
4694     {
4695       if ((remainder & (1 << i)) == 0)
4696         clear_sign_bit_copies++;
4697       else
4698         break;
4699     }
4700
4701   /* Count number of leading 1's.  */
4702   for (i = 31; i >= 0; i--)
4703     {
4704       if ((remainder & (1 << i)) != 0)
4705         set_sign_bit_copies++;
4706       else
4707         break;
4708     }
4709
4710   /* Count number of trailing zero's.  */
4711   for (i = 0; i <= 31; i++)
4712     {
4713       if ((remainder & (1 << i)) == 0)
4714         clear_zero_bit_copies++;
4715       else
4716         break;
4717     }
4718
4719   /* Count number of trailing 1's.  */
4720   for (i = 0; i <= 31; i++)
4721     {
4722       if ((remainder & (1 << i)) != 0)
4723         set_zero_bit_copies++;
4724       else
4725         break;
4726     }
4727
4728   switch (code)
4729     {
4730     case SET:
4731       /* See if we can do this by sign_extending a constant that is known
4732          to be negative.  This is a good, way of doing it, since the shift
4733          may well merge into a subsequent insn.  */
4734       if (set_sign_bit_copies > 1)
4735         {
4736           if (const_ok_for_arm
4737               (temp1 = ARM_SIGN_EXTEND (remainder
4738                                         << (set_sign_bit_copies - 1))))
4739             {
4740               if (generate)
4741                 {
4742                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4743                   emit_constant_insn (cond,
4744                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4745                   emit_constant_insn (cond,
4746                                       gen_ashrsi3 (target, new_src,
4747                                                    GEN_INT (set_sign_bit_copies - 1)));
4748                 }
4749               return 2;
4750             }
4751           /* For an inverted constant, we will need to set the low bits,
4752              these will be shifted out of harm's way.  */
4753           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4754           if (const_ok_for_arm (~temp1))
4755             {
4756               if (generate)
4757                 {
4758                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4759                   emit_constant_insn (cond,
4760                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4761                   emit_constant_insn (cond,
4762                                       gen_ashrsi3 (target, new_src,
4763                                                    GEN_INT (set_sign_bit_copies - 1)));
4764                 }
4765               return 2;
4766             }
4767         }
4768
4769       /* See if we can calculate the value as the difference between two
4770          valid immediates.  */
4771       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4772         {
4773           int topshift = clear_sign_bit_copies & ~1;
4774
4775           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4776                                    & (0xff000000 >> topshift));
4777
4778           /* If temp1 is zero, then that means the 9 most significant
4779              bits of remainder were 1 and we've caused it to overflow.
4780              When topshift is 0 we don't need to do anything since we
4781              can borrow from 'bit 32'.  */
4782           if (temp1 == 0 && topshift != 0)
4783             temp1 = 0x80000000 >> (topshift - 1);
4784
4785           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4786
4787           if (const_ok_for_arm (temp2))
4788             {
4789               if (generate)
4790                 {
4791                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4792                   emit_constant_insn (cond,
4793                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4794                   emit_constant_insn (cond,
4795                                       gen_addsi3 (target, new_src,
4796                                                   GEN_INT (-temp2)));
4797                 }
4798
4799               return 2;
4800             }
4801         }
4802
4803       /* See if we can generate this by setting the bottom (or the top)
4804          16 bits, and then shifting these into the other half of the
4805          word.  We only look for the simplest cases, to do more would cost
4806          too much.  Be careful, however, not to generate this when the
4807          alternative would take fewer insns.  */
4808       if (val & 0xffff0000)
4809         {
4810           temp1 = remainder & 0xffff0000;
4811           temp2 = remainder & 0x0000ffff;
4812
4813           /* Overlaps outside this range are best done using other methods.  */
4814           for (i = 9; i < 24; i++)
4815             {
4816               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4817                   && !const_ok_for_arm (temp2))
4818                 {
4819                   rtx new_src = (subtargets
4820                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4821                                  : target);
4822                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4823                                             source, subtargets, generate);
4824                   source = new_src;
4825                   if (generate)
4826                     emit_constant_insn
4827                       (cond,
4828                        gen_rtx_SET
4829                        (target,
4830                         gen_rtx_IOR (mode,
4831                                      gen_rtx_ASHIFT (mode, source,
4832                                                      GEN_INT (i)),
4833                                      source)));
4834                   return insns + 1;
4835                 }
4836             }
4837
4838           /* Don't duplicate cases already considered.  */
4839           for (i = 17; i < 24; i++)
4840             {
4841               if (((temp1 | (temp1 >> i)) == remainder)
4842                   && !const_ok_for_arm (temp1))
4843                 {
4844                   rtx new_src = (subtargets
4845                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4846                                  : target);
4847                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4848                                             source, subtargets, generate);
4849                   source = new_src;
4850                   if (generate)
4851                     emit_constant_insn
4852                       (cond,
4853                        gen_rtx_SET (target,
4854                                     gen_rtx_IOR
4855                                     (mode,
4856                                      gen_rtx_LSHIFTRT (mode, source,
4857                                                        GEN_INT (i)),
4858                                      source)));
4859                   return insns + 1;
4860                 }
4861             }
4862         }
4863       break;
4864
4865     case IOR:
4866     case XOR:
4867       /* If we have IOR or XOR, and the constant can be loaded in a
4868          single instruction, and we can find a temporary to put it in,
4869          then this can be done in two instructions instead of 3-4.  */
4870       if (subtargets
4871           /* TARGET can't be NULL if SUBTARGETS is 0 */
4872           || (reload_completed && !reg_mentioned_p (target, source)))
4873         {
4874           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4875             {
4876               if (generate)
4877                 {
4878                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4879
4880                   emit_constant_insn (cond,
4881                                       gen_rtx_SET (sub, GEN_INT (val)));
4882                   emit_constant_insn (cond,
4883                                       gen_rtx_SET (target,
4884                                                    gen_rtx_fmt_ee (code, mode,
4885                                                                    source, sub)));
4886                 }
4887               return 2;
4888             }
4889         }
4890
4891       if (code == XOR)
4892         break;
4893
4894       /*  Convert.
4895           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4896                              and the remainder 0s for e.g. 0xfff00000)
4897           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4898
4899           This can be done in 2 instructions by using shifts with mov or mvn.
4900           e.g. for
4901           x = x | 0xfff00000;
4902           we generate.
4903           mvn   r0, r0, asl #12
4904           mvn   r0, r0, lsr #12  */
4905       if (set_sign_bit_copies > 8
4906           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4907         {
4908           if (generate)
4909             {
4910               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4911               rtx shift = GEN_INT (set_sign_bit_copies);
4912
4913               emit_constant_insn
4914                 (cond,
4915                  gen_rtx_SET (sub,
4916                               gen_rtx_NOT (mode,
4917                                            gen_rtx_ASHIFT (mode,
4918                                                            source,
4919                                                            shift))));
4920               emit_constant_insn
4921                 (cond,
4922                  gen_rtx_SET (target,
4923                               gen_rtx_NOT (mode,
4924                                            gen_rtx_LSHIFTRT (mode, sub,
4925                                                              shift))));
4926             }
4927           return 2;
4928         }
4929
4930       /* Convert
4931           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4932            to
4933           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4934
4935           For eg. r0 = r0 | 0xfff
4936                mvn      r0, r0, lsr #12
4937                mvn      r0, r0, asl #12
4938
4939       */
4940       if (set_zero_bit_copies > 8
4941           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4942         {
4943           if (generate)
4944             {
4945               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4946               rtx shift = GEN_INT (set_zero_bit_copies);
4947
4948               emit_constant_insn
4949                 (cond,
4950                  gen_rtx_SET (sub,
4951                               gen_rtx_NOT (mode,
4952                                            gen_rtx_LSHIFTRT (mode,
4953                                                              source,
4954                                                              shift))));
4955               emit_constant_insn
4956                 (cond,
4957                  gen_rtx_SET (target,
4958                               gen_rtx_NOT (mode,
4959                                            gen_rtx_ASHIFT (mode, sub,
4960                                                            shift))));
4961             }
4962           return 2;
4963         }
4964
4965       /* This will never be reached for Thumb2 because orn is a valid
4966          instruction. This is for Thumb1 and the ARM 32 bit cases.
4967
4968          x = y | constant (such that ~constant is a valid constant)
4969          Transform this to
4970          x = ~(~y & ~constant).
4971       */
4972       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4973         {
4974           if (generate)
4975             {
4976               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4977               emit_constant_insn (cond,
4978                                   gen_rtx_SET (sub,
4979                                                gen_rtx_NOT (mode, source)));
4980               source = sub;
4981               if (subtargets)
4982                 sub = gen_reg_rtx (mode);
4983               emit_constant_insn (cond,
4984                                   gen_rtx_SET (sub,
4985                                                gen_rtx_AND (mode, source,
4986                                                             GEN_INT (temp1))));
4987               emit_constant_insn (cond,
4988                                   gen_rtx_SET (target,
4989                                                gen_rtx_NOT (mode, sub)));
4990             }
4991           return 3;
4992         }
4993       break;
4994
4995     case AND:
4996       /* See if two shifts will do 2 or more insn's worth of work.  */
4997       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4998         {
4999           HOST_WIDE_INT shift_mask = ((0xffffffff
5000                                        << (32 - clear_sign_bit_copies))
5001                                       & 0xffffffff);
5002
5003           if ((remainder | shift_mask) != 0xffffffff)
5004             {
5005               HOST_WIDE_INT new_val
5006                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5007
5008               if (generate)
5009                 {
5010                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5011                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5012                                             new_src, source, subtargets, 1);
5013                   source = new_src;
5014                 }
5015               else
5016                 {
5017                   rtx targ = subtargets ? NULL_RTX : target;
5018                   insns = arm_gen_constant (AND, mode, cond, new_val,
5019                                             targ, source, subtargets, 0);
5020                 }
5021             }
5022
5023           if (generate)
5024             {
5025               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5026               rtx shift = GEN_INT (clear_sign_bit_copies);
5027
5028               emit_insn (gen_ashlsi3 (new_src, source, shift));
5029               emit_insn (gen_lshrsi3 (target, new_src, shift));
5030             }
5031
5032           return insns + 2;
5033         }
5034
5035       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5036         {
5037           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5038
5039           if ((remainder | shift_mask) != 0xffffffff)
5040             {
5041               HOST_WIDE_INT new_val
5042                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5043               if (generate)
5044                 {
5045                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5046
5047                   insns = arm_gen_constant (AND, mode, cond, new_val,
5048                                             new_src, source, subtargets, 1);
5049                   source = new_src;
5050                 }
5051               else
5052                 {
5053                   rtx targ = subtargets ? NULL_RTX : target;
5054
5055                   insns = arm_gen_constant (AND, mode, cond, new_val,
5056                                             targ, source, subtargets, 0);
5057                 }
5058             }
5059
5060           if (generate)
5061             {
5062               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063               rtx shift = GEN_INT (clear_zero_bit_copies);
5064
5065               emit_insn (gen_lshrsi3 (new_src, source, shift));
5066               emit_insn (gen_ashlsi3 (target, new_src, shift));
5067             }
5068
5069           return insns + 2;
5070         }
5071
5072       break;
5073
5074     default:
5075       break;
5076     }
5077
5078   /* Calculate what the instruction sequences would be if we generated it
5079      normally, negated, or inverted.  */
5080   if (code == AND)
5081     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5082     insns = 99;
5083   else
5084     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5085
5086   if (can_negate)
5087     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5088                                             &neg_immediates);
5089   else
5090     neg_insns = 99;
5091
5092   if (can_invert || final_invert)
5093     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5094                                             &inv_immediates);
5095   else
5096     inv_insns = 99;
5097
5098   immediates = &pos_immediates;
5099
5100   /* Is the negated immediate sequence more efficient?  */
5101   if (neg_insns < insns && neg_insns <= inv_insns)
5102     {
5103       insns = neg_insns;
5104       immediates = &neg_immediates;
5105     }
5106   else
5107     can_negate = 0;
5108
5109   /* Is the inverted immediate sequence more efficient?
5110      We must allow for an extra NOT instruction for XOR operations, although
5111      there is some chance that the final 'mvn' will get optimized later.  */
5112   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5113     {
5114       insns = inv_insns;
5115       immediates = &inv_immediates;
5116     }
5117   else
5118     {
5119       can_invert = 0;
5120       final_invert = 0;
5121     }
5122
5123   /* Now output the chosen sequence as instructions.  */
5124   if (generate)
5125     {
5126       for (i = 0; i < insns; i++)
5127         {
5128           rtx new_src, temp1_rtx;
5129
5130           temp1 = immediates->i[i];
5131
5132           if (code == SET || code == MINUS)
5133             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5134           else if ((final_invert || i < (insns - 1)) && subtargets)
5135             new_src = gen_reg_rtx (mode);
5136           else
5137             new_src = target;
5138
5139           if (can_invert)
5140             temp1 = ~temp1;
5141           else if (can_negate)
5142             temp1 = -temp1;
5143
5144           temp1 = trunc_int_for_mode (temp1, mode);
5145           temp1_rtx = GEN_INT (temp1);
5146
5147           if (code == SET)
5148             ;
5149           else if (code == MINUS)
5150             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5151           else
5152             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5153
5154           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5155           source = new_src;
5156
5157           if (code == SET)
5158             {
5159               can_negate = can_invert;
5160               can_invert = 0;
5161               code = PLUS;
5162             }
5163           else if (code == MINUS)
5164             code = PLUS;
5165         }
5166     }
5167
5168   if (final_invert)
5169     {
5170       if (generate)
5171         emit_constant_insn (cond, gen_rtx_SET (target,
5172                                                gen_rtx_NOT (mode, source)));
5173       insns++;
5174     }
5175
5176   return insns;
5177 }
5178
5179 /* Canonicalize a comparison so that we are more likely to recognize it.
5180    This can be done for a few constant compares, where we can make the
5181    immediate value easier to load.  */
5182
5183 static void
5184 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5185                              bool op0_preserve_value)
5186 {
5187   machine_mode mode;
5188   unsigned HOST_WIDE_INT i, maxval;
5189
5190   mode = GET_MODE (*op0);
5191   if (mode == VOIDmode)
5192     mode = GET_MODE (*op1);
5193
5194   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5195
5196   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5197      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5198      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5199      for GTU/LEU in Thumb mode.  */
5200   if (mode == DImode)
5201     {
5202
5203       if (*code == GT || *code == LE
5204           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5205         {
5206           /* Missing comparison.  First try to use an available
5207              comparison.  */
5208           if (CONST_INT_P (*op1))
5209             {
5210               i = INTVAL (*op1);
5211               switch (*code)
5212                 {
5213                 case GT:
5214                 case LE:
5215                   if (i != maxval
5216                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5217                     {
5218                       *op1 = GEN_INT (i + 1);
5219                       *code = *code == GT ? GE : LT;
5220                       return;
5221                     }
5222                   break;
5223                 case GTU:
5224                 case LEU:
5225                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5226                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5227                     {
5228                       *op1 = GEN_INT (i + 1);
5229                       *code = *code == GTU ? GEU : LTU;
5230                       return;
5231                     }
5232                   break;
5233                 default:
5234                   gcc_unreachable ();
5235                 }
5236             }
5237
5238           /* If that did not work, reverse the condition.  */
5239           if (!op0_preserve_value)
5240             {
5241               std::swap (*op0, *op1);
5242               *code = (int)swap_condition ((enum rtx_code)*code);
5243             }
5244         }
5245       return;
5246     }
5247
5248   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5249      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5250      to facilitate possible combining with a cmp into 'ands'.  */
5251   if (mode == SImode
5252       && GET_CODE (*op0) == ZERO_EXTEND
5253       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5254       && GET_MODE (XEXP (*op0, 0)) == QImode
5255       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5256       && subreg_lowpart_p (XEXP (*op0, 0))
5257       && *op1 == const0_rtx)
5258     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5259                         GEN_INT (255));
5260
5261   /* Comparisons smaller than DImode.  Only adjust comparisons against
5262      an out-of-range constant.  */
5263   if (!CONST_INT_P (*op1)
5264       || const_ok_for_arm (INTVAL (*op1))
5265       || const_ok_for_arm (- INTVAL (*op1)))
5266     return;
5267
5268   i = INTVAL (*op1);
5269
5270   switch (*code)
5271     {
5272     case EQ:
5273     case NE:
5274       return;
5275
5276     case GT:
5277     case LE:
5278       if (i != maxval
5279           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5280         {
5281           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5282           *code = *code == GT ? GE : LT;
5283           return;
5284         }
5285       break;
5286
5287     case GE:
5288     case LT:
5289       if (i != ~maxval
5290           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5291         {
5292           *op1 = GEN_INT (i - 1);
5293           *code = *code == GE ? GT : LE;
5294           return;
5295         }
5296       break;
5297
5298     case GTU:
5299     case LEU:
5300       if (i != ~((unsigned HOST_WIDE_INT) 0)
5301           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5302         {
5303           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5304           *code = *code == GTU ? GEU : LTU;
5305           return;
5306         }
5307       break;
5308
5309     case GEU:
5310     case LTU:
5311       if (i != 0
5312           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5313         {
5314           *op1 = GEN_INT (i - 1);
5315           *code = *code == GEU ? GTU : LEU;
5316           return;
5317         }
5318       break;
5319
5320     default:
5321       gcc_unreachable ();
5322     }
5323 }
5324
5325
5326 /* Define how to find the value returned by a function.  */
5327
5328 static rtx
5329 arm_function_value(const_tree type, const_tree func,
5330                    bool outgoing ATTRIBUTE_UNUSED)
5331 {
5332   machine_mode mode;
5333   int unsignedp ATTRIBUTE_UNUSED;
5334   rtx r ATTRIBUTE_UNUSED;
5335
5336   mode = TYPE_MODE (type);
5337
5338   if (TARGET_AAPCS_BASED)
5339     return aapcs_allocate_return_reg (mode, type, func);
5340
5341   /* Promote integer types.  */
5342   if (INTEGRAL_TYPE_P (type))
5343     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5344
5345   /* Promotes small structs returned in a register to full-word size
5346      for big-endian AAPCS.  */
5347   if (arm_return_in_msb (type))
5348     {
5349       HOST_WIDE_INT size = int_size_in_bytes (type);
5350       if (size % UNITS_PER_WORD != 0)
5351         {
5352           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5353           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5354         }
5355     }
5356
5357   return arm_libcall_value_1 (mode);
5358 }
5359
5360 /* libcall hashtable helpers.  */
5361
5362 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5363 {
5364   static inline hashval_t hash (const rtx_def *);
5365   static inline bool equal (const rtx_def *, const rtx_def *);
5366   static inline void remove (rtx_def *);
5367 };
5368
5369 inline bool
5370 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5371 {
5372   return rtx_equal_p (p1, p2);
5373 }
5374
5375 inline hashval_t
5376 libcall_hasher::hash (const rtx_def *p1)
5377 {
5378   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5379 }
5380
5381 typedef hash_table<libcall_hasher> libcall_table_type;
5382
5383 static void
5384 add_libcall (libcall_table_type *htab, rtx libcall)
5385 {
5386   *htab->find_slot (libcall, INSERT) = libcall;
5387 }
5388
5389 static bool
5390 arm_libcall_uses_aapcs_base (const_rtx libcall)
5391 {
5392   static bool init_done = false;
5393   static libcall_table_type *libcall_htab = NULL;
5394
5395   if (!init_done)
5396     {
5397       init_done = true;
5398
5399       libcall_htab = new libcall_table_type (31);
5400       add_libcall (libcall_htab,
5401                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5402       add_libcall (libcall_htab,
5403                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5404       add_libcall (libcall_htab,
5405                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5406       add_libcall (libcall_htab,
5407                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5408
5409       add_libcall (libcall_htab,
5410                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5411       add_libcall (libcall_htab,
5412                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5413       add_libcall (libcall_htab,
5414                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5415       add_libcall (libcall_htab,
5416                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5417
5418       add_libcall (libcall_htab,
5419                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5420       add_libcall (libcall_htab,
5421                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5422       add_libcall (libcall_htab,
5423                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5424       add_libcall (libcall_htab,
5425                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5426       add_libcall (libcall_htab,
5427                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5428       add_libcall (libcall_htab,
5429                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5430       add_libcall (libcall_htab,
5431                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5434
5435       /* Values from double-precision helper functions are returned in core
5436          registers if the selected core only supports single-precision
5437          arithmetic, even if we are using the hard-float ABI.  The same is
5438          true for single-precision helpers, but we will never be using the
5439          hard-float ABI on a CPU which doesn't support single-precision
5440          operations in hardware.  */
5441       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5442       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5443       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5444       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5445       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5446       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5447       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5448       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5449       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5450       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5451       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5452       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5453                                                         SFmode));
5454       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5455                                                         DFmode));
5456       add_libcall (libcall_htab,
5457                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5458     }
5459
5460   return libcall && libcall_htab->find (libcall) != NULL;
5461 }
5462
5463 static rtx
5464 arm_libcall_value_1 (machine_mode mode)
5465 {
5466   if (TARGET_AAPCS_BASED)
5467     return aapcs_libcall_value (mode);
5468   else if (TARGET_IWMMXT_ABI
5469            && arm_vector_mode_supported_p (mode))
5470     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5471   else
5472     return gen_rtx_REG (mode, ARG_REGISTER (1));
5473 }
5474
5475 /* Define how to find the value returned by a library function
5476    assuming the value has mode MODE.  */
5477
5478 static rtx
5479 arm_libcall_value (machine_mode mode, const_rtx libcall)
5480 {
5481   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5482       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5483     {
5484       /* The following libcalls return their result in integer registers,
5485          even though they return a floating point value.  */
5486       if (arm_libcall_uses_aapcs_base (libcall))
5487         return gen_rtx_REG (mode, ARG_REGISTER(1));
5488
5489     }
5490
5491   return arm_libcall_value_1 (mode);
5492 }
5493
5494 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5495
5496 static bool
5497 arm_function_value_regno_p (const unsigned int regno)
5498 {
5499   if (regno == ARG_REGISTER (1)
5500       || (TARGET_32BIT
5501           && TARGET_AAPCS_BASED
5502           && TARGET_HARD_FLOAT
5503           && regno == FIRST_VFP_REGNUM)
5504       || (TARGET_IWMMXT_ABI
5505           && regno == FIRST_IWMMXT_REGNUM))
5506     return true;
5507
5508   return false;
5509 }
5510
5511 /* Determine the amount of memory needed to store the possible return
5512    registers of an untyped call.  */
5513 int
5514 arm_apply_result_size (void)
5515 {
5516   int size = 16;
5517
5518   if (TARGET_32BIT)
5519     {
5520       if (TARGET_HARD_FLOAT_ABI)
5521         size += 32;
5522       if (TARGET_IWMMXT_ABI)
5523         size += 8;
5524     }
5525
5526   return size;
5527 }
5528
5529 /* Decide whether TYPE should be returned in memory (true)
5530    or in a register (false).  FNTYPE is the type of the function making
5531    the call.  */
5532 static bool
5533 arm_return_in_memory (const_tree type, const_tree fntype)
5534 {
5535   HOST_WIDE_INT size;
5536
5537   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5538
5539   if (TARGET_AAPCS_BASED)
5540     {
5541       /* Simple, non-aggregate types (ie not including vectors and
5542          complex) are always returned in a register (or registers).
5543          We don't care about which register here, so we can short-cut
5544          some of the detail.  */
5545       if (!AGGREGATE_TYPE_P (type)
5546           && TREE_CODE (type) != VECTOR_TYPE
5547           && TREE_CODE (type) != COMPLEX_TYPE)
5548         return false;
5549
5550       /* Any return value that is no larger than one word can be
5551          returned in r0.  */
5552       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5553         return false;
5554
5555       /* Check any available co-processors to see if they accept the
5556          type as a register candidate (VFP, for example, can return
5557          some aggregates in consecutive registers).  These aren't
5558          available if the call is variadic.  */
5559       if (aapcs_select_return_coproc (type, fntype) >= 0)
5560         return false;
5561
5562       /* Vector values should be returned using ARM registers, not
5563          memory (unless they're over 16 bytes, which will break since
5564          we only have four call-clobbered registers to play with).  */
5565       if (TREE_CODE (type) == VECTOR_TYPE)
5566         return (size < 0 || size > (4 * UNITS_PER_WORD));
5567
5568       /* The rest go in memory.  */
5569       return true;
5570     }
5571
5572   if (TREE_CODE (type) == VECTOR_TYPE)
5573     return (size < 0 || size > (4 * UNITS_PER_WORD));
5574
5575   if (!AGGREGATE_TYPE_P (type) &&
5576       (TREE_CODE (type) != VECTOR_TYPE))
5577     /* All simple types are returned in registers.  */
5578     return false;
5579
5580   if (arm_abi != ARM_ABI_APCS)
5581     {
5582       /* ATPCS and later return aggregate types in memory only if they are
5583          larger than a word (or are variable size).  */
5584       return (size < 0 || size > UNITS_PER_WORD);
5585     }
5586
5587   /* For the arm-wince targets we choose to be compatible with Microsoft's
5588      ARM and Thumb compilers, which always return aggregates in memory.  */
5589 #ifndef ARM_WINCE
5590   /* All structures/unions bigger than one word are returned in memory.
5591      Also catch the case where int_size_in_bytes returns -1.  In this case
5592      the aggregate is either huge or of variable size, and in either case
5593      we will want to return it via memory and not in a register.  */
5594   if (size < 0 || size > UNITS_PER_WORD)
5595     return true;
5596
5597   if (TREE_CODE (type) == RECORD_TYPE)
5598     {
5599       tree field;
5600
5601       /* For a struct the APCS says that we only return in a register
5602          if the type is 'integer like' and every addressable element
5603          has an offset of zero.  For practical purposes this means
5604          that the structure can have at most one non bit-field element
5605          and that this element must be the first one in the structure.  */
5606
5607       /* Find the first field, ignoring non FIELD_DECL things which will
5608          have been created by C++.  */
5609       for (field = TYPE_FIELDS (type);
5610            field && TREE_CODE (field) != FIELD_DECL;
5611            field = DECL_CHAIN (field))
5612         continue;
5613
5614       if (field == NULL)
5615         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5616
5617       /* Check that the first field is valid for returning in a register.  */
5618
5619       /* ... Floats are not allowed */
5620       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5621         return true;
5622
5623       /* ... Aggregates that are not themselves valid for returning in
5624          a register are not allowed.  */
5625       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5626         return true;
5627
5628       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5629          since they are not addressable.  */
5630       for (field = DECL_CHAIN (field);
5631            field;
5632            field = DECL_CHAIN (field))
5633         {
5634           if (TREE_CODE (field) != FIELD_DECL)
5635             continue;
5636
5637           if (!DECL_BIT_FIELD_TYPE (field))
5638             return true;
5639         }
5640
5641       return false;
5642     }
5643
5644   if (TREE_CODE (type) == UNION_TYPE)
5645     {
5646       tree field;
5647
5648       /* Unions can be returned in registers if every element is
5649          integral, or can be returned in an integer register.  */
5650       for (field = TYPE_FIELDS (type);
5651            field;
5652            field = DECL_CHAIN (field))
5653         {
5654           if (TREE_CODE (field) != FIELD_DECL)
5655             continue;
5656
5657           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5658             return true;
5659
5660           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5661             return true;
5662         }
5663
5664       return false;
5665     }
5666 #endif /* not ARM_WINCE */
5667
5668   /* Return all other types in memory.  */
5669   return true;
5670 }
5671
5672 const struct pcs_attribute_arg
5673 {
5674   const char *arg;
5675   enum arm_pcs value;
5676 } pcs_attribute_args[] =
5677   {
5678     {"aapcs", ARM_PCS_AAPCS},
5679     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5680 #if 0
5681     /* We could recognize these, but changes would be needed elsewhere
5682      * to implement them.  */
5683     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5684     {"atpcs", ARM_PCS_ATPCS},
5685     {"apcs", ARM_PCS_APCS},
5686 #endif
5687     {NULL, ARM_PCS_UNKNOWN}
5688   };
5689
5690 static enum arm_pcs
5691 arm_pcs_from_attribute (tree attr)
5692 {
5693   const struct pcs_attribute_arg *ptr;
5694   const char *arg;
5695
5696   /* Get the value of the argument.  */
5697   if (TREE_VALUE (attr) == NULL_TREE
5698       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5699     return ARM_PCS_UNKNOWN;
5700
5701   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5702
5703   /* Check it against the list of known arguments.  */
5704   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5705     if (streq (arg, ptr->arg))
5706       return ptr->value;
5707
5708   /* An unrecognized interrupt type.  */
5709   return ARM_PCS_UNKNOWN;
5710 }
5711
5712 /* Get the PCS variant to use for this call.  TYPE is the function's type
5713    specification, DECL is the specific declartion.  DECL may be null if
5714    the call could be indirect or if this is a library call.  */
5715 static enum arm_pcs
5716 arm_get_pcs_model (const_tree type, const_tree decl)
5717 {
5718   bool user_convention = false;
5719   enum arm_pcs user_pcs = arm_pcs_default;
5720   tree attr;
5721
5722   gcc_assert (type);
5723
5724   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5725   if (attr)
5726     {
5727       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5728       user_convention = true;
5729     }
5730
5731   if (TARGET_AAPCS_BASED)
5732     {
5733       /* Detect varargs functions.  These always use the base rules
5734          (no argument is ever a candidate for a co-processor
5735          register).  */
5736       bool base_rules = stdarg_p (type);
5737
5738       if (user_convention)
5739         {
5740           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5741             sorry ("non-AAPCS derived PCS variant");
5742           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5743             error ("variadic functions must use the base AAPCS variant");
5744         }
5745
5746       if (base_rules)
5747         return ARM_PCS_AAPCS;
5748       else if (user_convention)
5749         return user_pcs;
5750       else if (decl && flag_unit_at_a_time)
5751         {
5752           /* Local functions never leak outside this compilation unit,
5753              so we are free to use whatever conventions are
5754              appropriate.  */
5755           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5756           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5757           if (i && i->local)
5758             return ARM_PCS_AAPCS_LOCAL;
5759         }
5760     }
5761   else if (user_convention && user_pcs != arm_pcs_default)
5762     sorry ("PCS variant");
5763
5764   /* For everything else we use the target's default.  */
5765   return arm_pcs_default;
5766 }
5767
5768
5769 static void
5770 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5771                     const_tree fntype ATTRIBUTE_UNUSED,
5772                     rtx libcall ATTRIBUTE_UNUSED,
5773                     const_tree fndecl ATTRIBUTE_UNUSED)
5774 {
5775   /* Record the unallocated VFP registers.  */
5776   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5777   pcum->aapcs_vfp_reg_alloc = 0;
5778 }
5779
5780 /* Walk down the type tree of TYPE counting consecutive base elements.
5781    If *MODEP is VOIDmode, then set it to the first valid floating point
5782    type.  If a non-floating point type is found, or if a floating point
5783    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5784    otherwise return the count in the sub-tree.  */
5785 static int
5786 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5787 {
5788   machine_mode mode;
5789   HOST_WIDE_INT size;
5790
5791   switch (TREE_CODE (type))
5792     {
5793     case REAL_TYPE:
5794       mode = TYPE_MODE (type);
5795       if (mode != DFmode && mode != SFmode && mode != HFmode)
5796         return -1;
5797
5798       if (*modep == VOIDmode)
5799         *modep = mode;
5800
5801       if (*modep == mode)
5802         return 1;
5803
5804       break;
5805
5806     case COMPLEX_TYPE:
5807       mode = TYPE_MODE (TREE_TYPE (type));
5808       if (mode != DFmode && mode != SFmode)
5809         return -1;
5810
5811       if (*modep == VOIDmode)
5812         *modep = mode;
5813
5814       if (*modep == mode)
5815         return 2;
5816
5817       break;
5818
5819     case VECTOR_TYPE:
5820       /* Use V2SImode and V4SImode as representatives of all 64-bit
5821          and 128-bit vector types, whether or not those modes are
5822          supported with the present options.  */
5823       size = int_size_in_bytes (type);
5824       switch (size)
5825         {
5826         case 8:
5827           mode = V2SImode;
5828           break;
5829         case 16:
5830           mode = V4SImode;
5831           break;
5832         default:
5833           return -1;
5834         }
5835
5836       if (*modep == VOIDmode)
5837         *modep = mode;
5838
5839       /* Vector modes are considered to be opaque: two vectors are
5840          equivalent for the purposes of being homogeneous aggregates
5841          if they are the same size.  */
5842       if (*modep == mode)
5843         return 1;
5844
5845       break;
5846
5847     case ARRAY_TYPE:
5848       {
5849         int count;
5850         tree index = TYPE_DOMAIN (type);
5851
5852         /* Can't handle incomplete types nor sizes that are not
5853            fixed.  */
5854         if (!COMPLETE_TYPE_P (type)
5855             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5856           return -1;
5857
5858         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5859         if (count == -1
5860             || !index
5861             || !TYPE_MAX_VALUE (index)
5862             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5863             || !TYPE_MIN_VALUE (index)
5864             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5865             || count < 0)
5866           return -1;
5867
5868         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5869                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5870
5871         /* There must be no padding.  */
5872         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5873           return -1;
5874
5875         return count;
5876       }
5877
5878     case RECORD_TYPE:
5879       {
5880         int count = 0;
5881         int sub_count;
5882         tree field;
5883
5884         /* Can't handle incomplete types nor sizes that are not
5885            fixed.  */
5886         if (!COMPLETE_TYPE_P (type)
5887             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5888           return -1;
5889
5890         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5891           {
5892             if (TREE_CODE (field) != FIELD_DECL)
5893               continue;
5894
5895             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5896             if (sub_count < 0)
5897               return -1;
5898             count += sub_count;
5899           }
5900
5901         /* There must be no padding.  */
5902         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5903           return -1;
5904
5905         return count;
5906       }
5907
5908     case UNION_TYPE:
5909     case QUAL_UNION_TYPE:
5910       {
5911         /* These aren't very interesting except in a degenerate case.  */
5912         int count = 0;
5913         int sub_count;
5914         tree field;
5915
5916         /* Can't handle incomplete types nor sizes that are not
5917            fixed.  */
5918         if (!COMPLETE_TYPE_P (type)
5919             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5920           return -1;
5921
5922         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5923           {
5924             if (TREE_CODE (field) != FIELD_DECL)
5925               continue;
5926
5927             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5928             if (sub_count < 0)
5929               return -1;
5930             count = count > sub_count ? count : sub_count;
5931           }
5932
5933         /* There must be no padding.  */
5934         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5935           return -1;
5936
5937         return count;
5938       }
5939
5940     default:
5941       break;
5942     }
5943
5944   return -1;
5945 }
5946
5947 /* Return true if PCS_VARIANT should use VFP registers.  */
5948 static bool
5949 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5950 {
5951   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5952     {
5953       static bool seen_thumb1_vfp = false;
5954
5955       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5956         {
5957           sorry ("Thumb-1 hard-float VFP ABI");
5958           /* sorry() is not immediately fatal, so only display this once.  */
5959           seen_thumb1_vfp = true;
5960         }
5961
5962       return true;
5963     }
5964
5965   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5966     return false;
5967
5968   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5969           (TARGET_VFP_DOUBLE || !is_double));
5970 }
5971
5972 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5973    suitable for passing or returning in VFP registers for the PCS
5974    variant selected.  If it is, then *BASE_MODE is updated to contain
5975    a machine mode describing each element of the argument's type and
5976    *COUNT to hold the number of such elements.  */
5977 static bool
5978 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5979                                        machine_mode mode, const_tree type,
5980                                        machine_mode *base_mode, int *count)
5981 {
5982   machine_mode new_mode = VOIDmode;
5983
5984   /* If we have the type information, prefer that to working things
5985      out from the mode.  */
5986   if (type)
5987     {
5988       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5989
5990       if (ag_count > 0 && ag_count <= 4)
5991         *count = ag_count;
5992       else
5993         return false;
5994     }
5995   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5996            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5997            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5998     {
5999       *count = 1;
6000       new_mode = mode;
6001     }
6002   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6003     {
6004       *count = 2;
6005       new_mode = (mode == DCmode ? DFmode : SFmode);
6006     }
6007   else
6008     return false;
6009
6010
6011   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6012     return false;
6013
6014   *base_mode = new_mode;
6015   return true;
6016 }
6017
6018 static bool
6019 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6020                                machine_mode mode, const_tree type)
6021 {
6022   int count ATTRIBUTE_UNUSED;
6023   machine_mode ag_mode ATTRIBUTE_UNUSED;
6024
6025   if (!use_vfp_abi (pcs_variant, false))
6026     return false;
6027   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6028                                                 &ag_mode, &count);
6029 }
6030
6031 static bool
6032 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6033                              const_tree type)
6034 {
6035   if (!use_vfp_abi (pcum->pcs_variant, false))
6036     return false;
6037
6038   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6039                                                 &pcum->aapcs_vfp_rmode,
6040                                                 &pcum->aapcs_vfp_rcount);
6041 }
6042
6043 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6044    for the behaviour of this function.  */
6045
6046 static bool
6047 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6048                     const_tree type  ATTRIBUTE_UNUSED)
6049 {
6050   int rmode_size
6051     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6052   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6053   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6054   int regno;
6055
6056   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6057     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6058       {
6059         pcum->aapcs_vfp_reg_alloc = mask << regno;
6060         if (mode == BLKmode
6061             || (mode == TImode && ! TARGET_NEON)
6062             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6063           {
6064             int i;
6065             int rcount = pcum->aapcs_vfp_rcount;
6066             int rshift = shift;
6067             machine_mode rmode = pcum->aapcs_vfp_rmode;
6068             rtx par;
6069             if (!TARGET_NEON)
6070               {
6071                 /* Avoid using unsupported vector modes.  */
6072                 if (rmode == V2SImode)
6073                   rmode = DImode;
6074                 else if (rmode == V4SImode)
6075                   {
6076                     rmode = DImode;
6077                     rcount *= 2;
6078                     rshift /= 2;
6079                   }
6080               }
6081             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6082             for (i = 0; i < rcount; i++)
6083               {
6084                 rtx tmp = gen_rtx_REG (rmode,
6085                                        FIRST_VFP_REGNUM + regno + i * rshift);
6086                 tmp = gen_rtx_EXPR_LIST
6087                   (VOIDmode, tmp,
6088                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6089                 XVECEXP (par, 0, i) = tmp;
6090               }
6091
6092             pcum->aapcs_reg = par;
6093           }
6094         else
6095           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6096         return true;
6097       }
6098   return false;
6099 }
6100
6101 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6102    comment there for the behaviour of this function.  */
6103
6104 static rtx
6105 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6106                                machine_mode mode,
6107                                const_tree type ATTRIBUTE_UNUSED)
6108 {
6109   if (!use_vfp_abi (pcs_variant, false))
6110     return NULL;
6111
6112   if (mode == BLKmode
6113       || (GET_MODE_CLASS (mode) == MODE_INT
6114           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6115           && !TARGET_NEON))
6116     {
6117       int count;
6118       machine_mode ag_mode;
6119       int i;
6120       rtx par;
6121       int shift;
6122
6123       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6124                                              &ag_mode, &count);
6125
6126       if (!TARGET_NEON)
6127         {
6128           if (ag_mode == V2SImode)
6129             ag_mode = DImode;
6130           else if (ag_mode == V4SImode)
6131             {
6132               ag_mode = DImode;
6133               count *= 2;
6134             }
6135         }
6136       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6137       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6138       for (i = 0; i < count; i++)
6139         {
6140           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6141           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6142                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6143           XVECEXP (par, 0, i) = tmp;
6144         }
6145
6146       return par;
6147     }
6148
6149   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6150 }
6151
6152 static void
6153 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6154                    machine_mode mode  ATTRIBUTE_UNUSED,
6155                    const_tree type  ATTRIBUTE_UNUSED)
6156 {
6157   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6158   pcum->aapcs_vfp_reg_alloc = 0;
6159   return;
6160 }
6161
6162 #define AAPCS_CP(X)                             \
6163   {                                             \
6164     aapcs_ ## X ## _cum_init,                   \
6165     aapcs_ ## X ## _is_call_candidate,          \
6166     aapcs_ ## X ## _allocate,                   \
6167     aapcs_ ## X ## _is_return_candidate,        \
6168     aapcs_ ## X ## _allocate_return_reg,        \
6169     aapcs_ ## X ## _advance                     \
6170   }
6171
6172 /* Table of co-processors that can be used to pass arguments in
6173    registers.  Idealy no arugment should be a candidate for more than
6174    one co-processor table entry, but the table is processed in order
6175    and stops after the first match.  If that entry then fails to put
6176    the argument into a co-processor register, the argument will go on
6177    the stack.  */
6178 static struct
6179 {
6180   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6181   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6182
6183   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6184      BLKmode) is a candidate for this co-processor's registers; this
6185      function should ignore any position-dependent state in
6186      CUMULATIVE_ARGS and only use call-type dependent information.  */
6187   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6188
6189   /* Return true if the argument does get a co-processor register; it
6190      should set aapcs_reg to an RTX of the register allocated as is
6191      required for a return from FUNCTION_ARG.  */
6192   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6193
6194   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6195      be returned in this co-processor's registers.  */
6196   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6197
6198   /* Allocate and return an RTX element to hold the return type of a call.  This
6199      routine must not fail and will only be called if is_return_candidate
6200      returned true with the same parameters.  */
6201   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6202
6203   /* Finish processing this argument and prepare to start processing
6204      the next one.  */
6205   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6206 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6207   {
6208     AAPCS_CP(vfp)
6209   };
6210
6211 #undef AAPCS_CP
6212
6213 static int
6214 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6215                           const_tree type)
6216 {
6217   int i;
6218
6219   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6220     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6221       return i;
6222
6223   return -1;
6224 }
6225
6226 static int
6227 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6228 {
6229   /* We aren't passed a decl, so we can't check that a call is local.
6230      However, it isn't clear that that would be a win anyway, since it
6231      might limit some tail-calling opportunities.  */
6232   enum arm_pcs pcs_variant;
6233
6234   if (fntype)
6235     {
6236       const_tree fndecl = NULL_TREE;
6237
6238       if (TREE_CODE (fntype) == FUNCTION_DECL)
6239         {
6240           fndecl = fntype;
6241           fntype = TREE_TYPE (fntype);
6242         }
6243
6244       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6245     }
6246   else
6247     pcs_variant = arm_pcs_default;
6248
6249   if (pcs_variant != ARM_PCS_AAPCS)
6250     {
6251       int i;
6252
6253       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6254         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6255                                                         TYPE_MODE (type),
6256                                                         type))
6257           return i;
6258     }
6259   return -1;
6260 }
6261
6262 static rtx
6263 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6264                            const_tree fntype)
6265 {
6266   /* We aren't passed a decl, so we can't check that a call is local.
6267      However, it isn't clear that that would be a win anyway, since it
6268      might limit some tail-calling opportunities.  */
6269   enum arm_pcs pcs_variant;
6270   int unsignedp ATTRIBUTE_UNUSED;
6271
6272   if (fntype)
6273     {
6274       const_tree fndecl = NULL_TREE;
6275
6276       if (TREE_CODE (fntype) == FUNCTION_DECL)
6277         {
6278           fndecl = fntype;
6279           fntype = TREE_TYPE (fntype);
6280         }
6281
6282       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6283     }
6284   else
6285     pcs_variant = arm_pcs_default;
6286
6287   /* Promote integer types.  */
6288   if (type && INTEGRAL_TYPE_P (type))
6289     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6290
6291   if (pcs_variant != ARM_PCS_AAPCS)
6292     {
6293       int i;
6294
6295       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6296         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6297                                                         type))
6298           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6299                                                              mode, type);
6300     }
6301
6302   /* Promotes small structs returned in a register to full-word size
6303      for big-endian AAPCS.  */
6304   if (type && arm_return_in_msb (type))
6305     {
6306       HOST_WIDE_INT size = int_size_in_bytes (type);
6307       if (size % UNITS_PER_WORD != 0)
6308         {
6309           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6310           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6311         }
6312     }
6313
6314   return gen_rtx_REG (mode, R0_REGNUM);
6315 }
6316
6317 static rtx
6318 aapcs_libcall_value (machine_mode mode)
6319 {
6320   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6321       && GET_MODE_SIZE (mode) <= 4)
6322     mode = SImode;
6323
6324   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6325 }
6326
6327 /* Lay out a function argument using the AAPCS rules.  The rule
6328    numbers referred to here are those in the AAPCS.  */
6329 static void
6330 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6331                   const_tree type, bool named)
6332 {
6333   int nregs, nregs2;
6334   int ncrn;
6335
6336   /* We only need to do this once per argument.  */
6337   if (pcum->aapcs_arg_processed)
6338     return;
6339
6340   pcum->aapcs_arg_processed = true;
6341
6342   /* Special case: if named is false then we are handling an incoming
6343      anonymous argument which is on the stack.  */
6344   if (!named)
6345     return;
6346
6347   /* Is this a potential co-processor register candidate?  */
6348   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6349     {
6350       int slot = aapcs_select_call_coproc (pcum, mode, type);
6351       pcum->aapcs_cprc_slot = slot;
6352
6353       /* We don't have to apply any of the rules from part B of the
6354          preparation phase, these are handled elsewhere in the
6355          compiler.  */
6356
6357       if (slot >= 0)
6358         {
6359           /* A Co-processor register candidate goes either in its own
6360              class of registers or on the stack.  */
6361           if (!pcum->aapcs_cprc_failed[slot])
6362             {
6363               /* C1.cp - Try to allocate the argument to co-processor
6364                  registers.  */
6365               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6366                 return;
6367
6368               /* C2.cp - Put the argument on the stack and note that we
6369                  can't assign any more candidates in this slot.  We also
6370                  need to note that we have allocated stack space, so that
6371                  we won't later try to split a non-cprc candidate between
6372                  core registers and the stack.  */
6373               pcum->aapcs_cprc_failed[slot] = true;
6374               pcum->can_split = false;
6375             }
6376
6377           /* We didn't get a register, so this argument goes on the
6378              stack.  */
6379           gcc_assert (pcum->can_split == false);
6380           return;
6381         }
6382     }
6383
6384   /* C3 - For double-word aligned arguments, round the NCRN up to the
6385      next even number.  */
6386   ncrn = pcum->aapcs_ncrn;
6387   if (ncrn & 1)
6388     {
6389       int res = arm_needs_doubleword_align (mode, type);
6390       /* Only warn during RTL expansion of call stmts, otherwise we would
6391          warn e.g. during gimplification even on functions that will be
6392          always inlined, and we'd warn multiple times.  Don't warn when
6393          called in expand_function_start either, as we warn instead in
6394          arm_function_arg_boundary in that case.  */
6395       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6396         inform (input_location, "parameter passing for argument of type "
6397                 "%qT changed in GCC 7.1", type);
6398       else if (res > 0)
6399         ncrn++;
6400     }
6401
6402   nregs = ARM_NUM_REGS2(mode, type);
6403
6404   /* Sigh, this test should really assert that nregs > 0, but a GCC
6405      extension allows empty structs and then gives them empty size; it
6406      then allows such a structure to be passed by value.  For some of
6407      the code below we have to pretend that such an argument has
6408      non-zero size so that we 'locate' it correctly either in
6409      registers or on the stack.  */
6410   gcc_assert (nregs >= 0);
6411
6412   nregs2 = nregs ? nregs : 1;
6413
6414   /* C4 - Argument fits entirely in core registers.  */
6415   if (ncrn + nregs2 <= NUM_ARG_REGS)
6416     {
6417       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6418       pcum->aapcs_next_ncrn = ncrn + nregs;
6419       return;
6420     }
6421
6422   /* C5 - Some core registers left and there are no arguments already
6423      on the stack: split this argument between the remaining core
6424      registers and the stack.  */
6425   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6426     {
6427       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6428       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6429       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6430       return;
6431     }
6432
6433   /* C6 - NCRN is set to 4.  */
6434   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6435
6436   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6437   return;
6438 }
6439
6440 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6441    for a call to a function whose data type is FNTYPE.
6442    For a library call, FNTYPE is NULL.  */
6443 void
6444 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6445                           rtx libname,
6446                           tree fndecl ATTRIBUTE_UNUSED)
6447 {
6448   /* Long call handling.  */
6449   if (fntype)
6450     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6451   else
6452     pcum->pcs_variant = arm_pcs_default;
6453
6454   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6455     {
6456       if (arm_libcall_uses_aapcs_base (libname))
6457         pcum->pcs_variant = ARM_PCS_AAPCS;
6458
6459       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6460       pcum->aapcs_reg = NULL_RTX;
6461       pcum->aapcs_partial = 0;
6462       pcum->aapcs_arg_processed = false;
6463       pcum->aapcs_cprc_slot = -1;
6464       pcum->can_split = true;
6465
6466       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6467         {
6468           int i;
6469
6470           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6471             {
6472               pcum->aapcs_cprc_failed[i] = false;
6473               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6474             }
6475         }
6476       return;
6477     }
6478
6479   /* Legacy ABIs */
6480
6481   /* On the ARM, the offset starts at 0.  */
6482   pcum->nregs = 0;
6483   pcum->iwmmxt_nregs = 0;
6484   pcum->can_split = true;
6485
6486   /* Varargs vectors are treated the same as long long.
6487      named_count avoids having to change the way arm handles 'named' */
6488   pcum->named_count = 0;
6489   pcum->nargs = 0;
6490
6491   if (TARGET_REALLY_IWMMXT && fntype)
6492     {
6493       tree fn_arg;
6494
6495       for (fn_arg = TYPE_ARG_TYPES (fntype);
6496            fn_arg;
6497            fn_arg = TREE_CHAIN (fn_arg))
6498         pcum->named_count += 1;
6499
6500       if (! pcum->named_count)
6501         pcum->named_count = INT_MAX;
6502     }
6503 }
6504
6505 /* Return 1 if double word alignment is required for argument passing.
6506    Return -1 if double word alignment used to be required for argument
6507    passing before PR77728 ABI fix, but is not required anymore.
6508    Return 0 if double word alignment is not required and wasn't requried
6509    before either.  */
6510 static int
6511 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6512 {
6513   if (!type)
6514     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6515
6516   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6517   if (!AGGREGATE_TYPE_P (type))
6518     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6519
6520   /* Array types: Use member alignment of element type.  */
6521   if (TREE_CODE (type) == ARRAY_TYPE)
6522     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6523
6524   int ret = 0;
6525   /* Record/aggregate types: Use greatest member alignment of any member.  */
6526   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6528       {
6529         if (TREE_CODE (field) == FIELD_DECL)
6530           return 1;
6531         else
6532           /* Before PR77728 fix, we were incorrectly considering also
6533              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6534              Make sure we can warn about that with -Wpsabi.  */
6535           ret = -1;
6536       }
6537
6538   return ret;
6539 }
6540
6541
6542 /* Determine where to put an argument to a function.
6543    Value is zero to push the argument on the stack,
6544    or a hard register in which to store the argument.
6545
6546    MODE is the argument's machine mode.
6547    TYPE is the data type of the argument (as a tree).
6548     This is null for libcalls where that information may
6549     not be available.
6550    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6551     the preceding args and about the function being called.
6552    NAMED is nonzero if this argument is a named parameter
6553     (otherwise it is an extra parameter matching an ellipsis).
6554
6555    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6556    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6557    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6558    defined), say it is passed in the stack (function_prologue will
6559    indeed make it pass in the stack if necessary).  */
6560
6561 static rtx
6562 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6563                   const_tree type, bool named)
6564 {
6565   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6566   int nregs;
6567
6568   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6569      a call insn (op3 of a call_value insn).  */
6570   if (mode == VOIDmode)
6571     return const0_rtx;
6572
6573   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6574     {
6575       aapcs_layout_arg (pcum, mode, type, named);
6576       return pcum->aapcs_reg;
6577     }
6578
6579   /* Varargs vectors are treated the same as long long.
6580      named_count avoids having to change the way arm handles 'named' */
6581   if (TARGET_IWMMXT_ABI
6582       && arm_vector_mode_supported_p (mode)
6583       && pcum->named_count > pcum->nargs + 1)
6584     {
6585       if (pcum->iwmmxt_nregs <= 9)
6586         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6587       else
6588         {
6589           pcum->can_split = false;
6590           return NULL_RTX;
6591         }
6592     }
6593
6594   /* Put doubleword aligned quantities in even register pairs.  */
6595   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6596     {
6597       int res = arm_needs_doubleword_align (mode, type);
6598       if (res < 0 && warn_psabi)
6599         inform (input_location, "parameter passing for argument of type "
6600                 "%qT changed in GCC 7.1", type);
6601       else if (res > 0)
6602         pcum->nregs++;
6603     }
6604
6605   /* Only allow splitting an arg between regs and memory if all preceding
6606      args were allocated to regs.  For args passed by reference we only count
6607      the reference pointer.  */
6608   if (pcum->can_split)
6609     nregs = 1;
6610   else
6611     nregs = ARM_NUM_REGS2 (mode, type);
6612
6613   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6614     return NULL_RTX;
6615
6616   return gen_rtx_REG (mode, pcum->nregs);
6617 }
6618
6619 static unsigned int
6620 arm_function_arg_boundary (machine_mode mode, const_tree type)
6621 {
6622   if (!ARM_DOUBLEWORD_ALIGN)
6623     return PARM_BOUNDARY;
6624
6625   int res = arm_needs_doubleword_align (mode, type);
6626   if (res < 0 && warn_psabi)
6627     inform (input_location, "parameter passing for argument of type %qT "
6628             "changed in GCC 7.1", type);
6629
6630   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6631 }
6632
6633 static int
6634 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6635                        tree type, bool named)
6636 {
6637   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6638   int nregs = pcum->nregs;
6639
6640   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6641     {
6642       aapcs_layout_arg (pcum, mode, type, named);
6643       return pcum->aapcs_partial;
6644     }
6645
6646   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6647     return 0;
6648
6649   if (NUM_ARG_REGS > nregs
6650       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6651       && pcum->can_split)
6652     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6653
6654   return 0;
6655 }
6656
6657 /* Update the data in PCUM to advance over an argument
6658    of mode MODE and data type TYPE.
6659    (TYPE is null for libcalls where that information may not be available.)  */
6660
6661 static void
6662 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6663                           const_tree type, bool named)
6664 {
6665   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6666
6667   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668     {
6669       aapcs_layout_arg (pcum, mode, type, named);
6670
6671       if (pcum->aapcs_cprc_slot >= 0)
6672         {
6673           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6674                                                               type);
6675           pcum->aapcs_cprc_slot = -1;
6676         }
6677
6678       /* Generic stuff.  */
6679       pcum->aapcs_arg_processed = false;
6680       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6681       pcum->aapcs_reg = NULL_RTX;
6682       pcum->aapcs_partial = 0;
6683     }
6684   else
6685     {
6686       pcum->nargs += 1;
6687       if (arm_vector_mode_supported_p (mode)
6688           && pcum->named_count > pcum->nargs
6689           && TARGET_IWMMXT_ABI)
6690         pcum->iwmmxt_nregs += 1;
6691       else
6692         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6693     }
6694 }
6695
6696 /* Variable sized types are passed by reference.  This is a GCC
6697    extension to the ARM ABI.  */
6698
6699 static bool
6700 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6701                        machine_mode mode ATTRIBUTE_UNUSED,
6702                        const_tree type, bool named ATTRIBUTE_UNUSED)
6703 {
6704   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6705 }
6706 \f
6707 /* Encode the current state of the #pragma [no_]long_calls.  */
6708 typedef enum
6709 {
6710   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6711   LONG,         /* #pragma long_calls is in effect.  */
6712   SHORT         /* #pragma no_long_calls is in effect.  */
6713 } arm_pragma_enum;
6714
6715 static arm_pragma_enum arm_pragma_long_calls = OFF;
6716
6717 void
6718 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6719 {
6720   arm_pragma_long_calls = LONG;
6721 }
6722
6723 void
6724 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6725 {
6726   arm_pragma_long_calls = SHORT;
6727 }
6728
6729 void
6730 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6731 {
6732   arm_pragma_long_calls = OFF;
6733 }
6734 \f
6735 /* Handle an attribute requiring a FUNCTION_DECL;
6736    arguments as in struct attribute_spec.handler.  */
6737 static tree
6738 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6739                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6740 {
6741   if (TREE_CODE (*node) != FUNCTION_DECL)
6742     {
6743       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6744                name);
6745       *no_add_attrs = true;
6746     }
6747
6748   return NULL_TREE;
6749 }
6750
6751 /* Handle an "interrupt" or "isr" attribute;
6752    arguments as in struct attribute_spec.handler.  */
6753 static tree
6754 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6755                           bool *no_add_attrs)
6756 {
6757   if (DECL_P (*node))
6758     {
6759       if (TREE_CODE (*node) != FUNCTION_DECL)
6760         {
6761           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6762                    name);
6763           *no_add_attrs = true;
6764         }
6765       /* FIXME: the argument if any is checked for type attributes;
6766          should it be checked for decl ones?  */
6767     }
6768   else
6769     {
6770       if (TREE_CODE (*node) == FUNCTION_TYPE
6771           || TREE_CODE (*node) == METHOD_TYPE)
6772         {
6773           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6774             {
6775               warning (OPT_Wattributes, "%qE attribute ignored",
6776                        name);
6777               *no_add_attrs = true;
6778             }
6779         }
6780       else if (TREE_CODE (*node) == POINTER_TYPE
6781                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6782                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6783                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6784         {
6785           *node = build_variant_type_copy (*node);
6786           TREE_TYPE (*node) = build_type_attribute_variant
6787             (TREE_TYPE (*node),
6788              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6789           *no_add_attrs = true;
6790         }
6791       else
6792         {
6793           /* Possibly pass this attribute on from the type to a decl.  */
6794           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6795                        | (int) ATTR_FLAG_FUNCTION_NEXT
6796                        | (int) ATTR_FLAG_ARRAY_NEXT))
6797             {
6798               *no_add_attrs = true;
6799               return tree_cons (name, args, NULL_TREE);
6800             }
6801           else
6802             {
6803               warning (OPT_Wattributes, "%qE attribute ignored",
6804                        name);
6805             }
6806         }
6807     }
6808
6809   return NULL_TREE;
6810 }
6811
6812 /* Handle a "pcs" attribute; arguments as in struct
6813    attribute_spec.handler.  */
6814 static tree
6815 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6816                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6817 {
6818   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6819     {
6820       warning (OPT_Wattributes, "%qE attribute ignored", name);
6821       *no_add_attrs = true;
6822     }
6823   return NULL_TREE;
6824 }
6825
6826 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6827 /* Handle the "notshared" attribute.  This attribute is another way of
6828    requesting hidden visibility.  ARM's compiler supports
6829    "__declspec(notshared)"; we support the same thing via an
6830    attribute.  */
6831
6832 static tree
6833 arm_handle_notshared_attribute (tree *node,
6834                                 tree name ATTRIBUTE_UNUSED,
6835                                 tree args ATTRIBUTE_UNUSED,
6836                                 int flags ATTRIBUTE_UNUSED,
6837                                 bool *no_add_attrs)
6838 {
6839   tree decl = TYPE_NAME (*node);
6840
6841   if (decl)
6842     {
6843       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6844       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6845       *no_add_attrs = false;
6846     }
6847   return NULL_TREE;
6848 }
6849 #endif
6850
6851 /* This function returns true if a function with declaration FNDECL and type
6852    FNTYPE uses the stack to pass arguments or return variables and false
6853    otherwise.  This is used for functions with the attributes
6854    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6855    diagnostic messages if the stack is used.  NAME is the name of the attribute
6856    used.  */
6857
6858 static bool
6859 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6860 {
6861   function_args_iterator args_iter;
6862   CUMULATIVE_ARGS args_so_far_v;
6863   cumulative_args_t args_so_far;
6864   bool first_param = true;
6865   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6866
6867   /* Error out if any argument is passed on the stack.  */
6868   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6869   args_so_far = pack_cumulative_args (&args_so_far_v);
6870   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6871     {
6872       rtx arg_rtx;
6873       machine_mode arg_mode = TYPE_MODE (arg_type);
6874
6875       prev_arg_type = arg_type;
6876       if (VOID_TYPE_P (arg_type))
6877         continue;
6878
6879       if (!first_param)
6880         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6881       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6882       if (!arg_rtx
6883           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6884         {
6885           error ("%qE attribute not available to functions with arguments "
6886                  "passed on the stack", name);
6887           return true;
6888         }
6889       first_param = false;
6890     }
6891
6892   /* Error out for variadic functions since we cannot control how many
6893      arguments will be passed and thus stack could be used.  stdarg_p () is not
6894      used for the checking to avoid browsing arguments twice.  */
6895   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6896     {
6897       error ("%qE attribute not available to functions with variable number "
6898              "of arguments", name);
6899       return true;
6900     }
6901
6902   /* Error out if return value is passed on the stack.  */
6903   ret_type = TREE_TYPE (fntype);
6904   if (arm_return_in_memory (ret_type, fntype))
6905     {
6906       error ("%qE attribute not available to functions that return value on "
6907              "the stack", name);
6908       return true;
6909     }
6910   return false;
6911 }
6912
6913 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6914    function will check whether the attribute is allowed here and will add the
6915    attribute to the function declaration tree or otherwise issue a warning.  */
6916
6917 static tree
6918 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6919                                  tree /* args */,
6920                                  int /* flags */,
6921                                  bool *no_add_attrs)
6922 {
6923   tree fndecl;
6924
6925   if (!use_cmse)
6926     {
6927       *no_add_attrs = true;
6928       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6929                name);
6930       return NULL_TREE;
6931     }
6932
6933   /* Ignore attribute for function types.  */
6934   if (TREE_CODE (*node) != FUNCTION_DECL)
6935     {
6936       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6937                name);
6938       *no_add_attrs = true;
6939       return NULL_TREE;
6940     }
6941
6942   fndecl = *node;
6943
6944   /* Warn for static linkage functions.  */
6945   if (!TREE_PUBLIC (fndecl))
6946     {
6947       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6948                "with static linkage", name);
6949       *no_add_attrs = true;
6950       return NULL_TREE;
6951     }
6952
6953   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6954                                                 TREE_TYPE (fndecl));
6955   return NULL_TREE;
6956 }
6957
6958
6959 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6960    function will check whether the attribute is allowed here and will add the
6961    attribute to the function type tree or otherwise issue a diagnostic.  The
6962    reason we check this at declaration time is to only allow the use of the
6963    attribute with declarations of function pointers and not function
6964    declarations.  This function checks NODE is of the expected type and issues
6965    diagnostics otherwise using NAME.  If it is not of the expected type
6966    *NO_ADD_ATTRS will be set to true.  */
6967
6968 static tree
6969 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6970                                  tree /* args */,
6971                                  int /* flags */,
6972                                  bool *no_add_attrs)
6973 {
6974   tree decl = NULL_TREE, fntype = NULL_TREE;
6975   tree type;
6976
6977   if (!use_cmse)
6978     {
6979       *no_add_attrs = true;
6980       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6981                name);
6982       return NULL_TREE;
6983     }
6984
6985   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6986     {
6987       decl = *node;
6988       fntype = TREE_TYPE (decl);
6989     }
6990
6991   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6992     fntype = TREE_TYPE (fntype);
6993
6994   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6995     {
6996         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6997                  "function pointer", name);
6998         *no_add_attrs = true;
6999         return NULL_TREE;
7000     }
7001
7002   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7003
7004   if (*no_add_attrs)
7005     return NULL_TREE;
7006
7007   /* Prevent trees being shared among function types with and without
7008      cmse_nonsecure_call attribute.  */
7009   type = TREE_TYPE (decl);
7010
7011   type = build_distinct_type_copy (type);
7012   TREE_TYPE (decl) = type;
7013   fntype = type;
7014
7015   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7016     {
7017       type = fntype;
7018       fntype = TREE_TYPE (fntype);
7019       fntype = build_distinct_type_copy (fntype);
7020       TREE_TYPE (type) = fntype;
7021     }
7022
7023   /* Construct a type attribute and add it to the function type.  */
7024   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7025                           TYPE_ATTRIBUTES (fntype));
7026   TYPE_ATTRIBUTES (fntype) = attrs;
7027   return NULL_TREE;
7028 }
7029
7030 /* Return 0 if the attributes for two types are incompatible, 1 if they
7031    are compatible, and 2 if they are nearly compatible (which causes a
7032    warning to be generated).  */
7033 static int
7034 arm_comp_type_attributes (const_tree type1, const_tree type2)
7035 {
7036   int l1, l2, s1, s2;
7037
7038   /* Check for mismatch of non-default calling convention.  */
7039   if (TREE_CODE (type1) != FUNCTION_TYPE)
7040     return 1;
7041
7042   /* Check for mismatched call attributes.  */
7043   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7044   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7045   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7046   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7047
7048   /* Only bother to check if an attribute is defined.  */
7049   if (l1 | l2 | s1 | s2)
7050     {
7051       /* If one type has an attribute, the other must have the same attribute.  */
7052       if ((l1 != l2) || (s1 != s2))
7053         return 0;
7054
7055       /* Disallow mixed attributes.  */
7056       if ((l1 & s2) || (l2 & s1))
7057         return 0;
7058     }
7059
7060   /* Check for mismatched ISR attribute.  */
7061   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7062   if (! l1)
7063     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7064   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7065   if (! l2)
7066     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7067   if (l1 != l2)
7068     return 0;
7069
7070   l1 = lookup_attribute ("cmse_nonsecure_call",
7071                          TYPE_ATTRIBUTES (type1)) != NULL;
7072   l2 = lookup_attribute ("cmse_nonsecure_call",
7073                          TYPE_ATTRIBUTES (type2)) != NULL;
7074
7075   if (l1 != l2)
7076     return 0;
7077
7078   return 1;
7079 }
7080
7081 /*  Assigns default attributes to newly defined type.  This is used to
7082     set short_call/long_call attributes for function types of
7083     functions defined inside corresponding #pragma scopes.  */
7084 static void
7085 arm_set_default_type_attributes (tree type)
7086 {
7087   /* Add __attribute__ ((long_call)) to all functions, when
7088      inside #pragma long_calls or __attribute__ ((short_call)),
7089      when inside #pragma no_long_calls.  */
7090   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7091     {
7092       tree type_attr_list, attr_name;
7093       type_attr_list = TYPE_ATTRIBUTES (type);
7094
7095       if (arm_pragma_long_calls == LONG)
7096         attr_name = get_identifier ("long_call");
7097       else if (arm_pragma_long_calls == SHORT)
7098         attr_name = get_identifier ("short_call");
7099       else
7100         return;
7101
7102       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7103       TYPE_ATTRIBUTES (type) = type_attr_list;
7104     }
7105 }
7106 \f
7107 /* Return true if DECL is known to be linked into section SECTION.  */
7108
7109 static bool
7110 arm_function_in_section_p (tree decl, section *section)
7111 {
7112   /* We can only be certain about the prevailing symbol definition.  */
7113   if (!decl_binds_to_current_def_p (decl))
7114     return false;
7115
7116   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7117   if (!DECL_SECTION_NAME (decl))
7118     {
7119       /* Make sure that we will not create a unique section for DECL.  */
7120       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7121         return false;
7122     }
7123
7124   return function_section (decl) == section;
7125 }
7126
7127 /* Return nonzero if a 32-bit "long_call" should be generated for
7128    a call from the current function to DECL.  We generate a long_call
7129    if the function:
7130
7131         a.  has an __attribute__((long call))
7132      or b.  is within the scope of a #pragma long_calls
7133      or c.  the -mlong-calls command line switch has been specified
7134
7135    However we do not generate a long call if the function:
7136
7137         d.  has an __attribute__ ((short_call))
7138      or e.  is inside the scope of a #pragma no_long_calls
7139      or f.  is defined in the same section as the current function.  */
7140
7141 bool
7142 arm_is_long_call_p (tree decl)
7143 {
7144   tree attrs;
7145
7146   if (!decl)
7147     return TARGET_LONG_CALLS;
7148
7149   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7150   if (lookup_attribute ("short_call", attrs))
7151     return false;
7152
7153   /* For "f", be conservative, and only cater for cases in which the
7154      whole of the current function is placed in the same section.  */
7155   if (!flag_reorder_blocks_and_partition
7156       && TREE_CODE (decl) == FUNCTION_DECL
7157       && arm_function_in_section_p (decl, current_function_section ()))
7158     return false;
7159
7160   if (lookup_attribute ("long_call", attrs))
7161     return true;
7162
7163   return TARGET_LONG_CALLS;
7164 }
7165
7166 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7167 static bool
7168 arm_function_ok_for_sibcall (tree decl, tree exp)
7169 {
7170   unsigned long func_type;
7171
7172   if (cfun->machine->sibcall_blocked)
7173     return false;
7174
7175   /* Never tailcall something if we are generating code for Thumb-1.  */
7176   if (TARGET_THUMB1)
7177     return false;
7178
7179   /* The PIC register is live on entry to VxWorks PLT entries, so we
7180      must make the call before restoring the PIC register.  */
7181   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7182     return false;
7183
7184   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7185      may be used both as target of the call and base register for restoring
7186      the VFP registers  */
7187   if (TARGET_APCS_FRAME && TARGET_ARM
7188       && TARGET_HARD_FLOAT
7189       && decl && arm_is_long_call_p (decl))
7190     return false;
7191
7192   /* If we are interworking and the function is not declared static
7193      then we can't tail-call it unless we know that it exists in this
7194      compilation unit (since it might be a Thumb routine).  */
7195   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7196       && !TREE_ASM_WRITTEN (decl))
7197     return false;
7198
7199   func_type = arm_current_func_type ();
7200   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7201   if (IS_INTERRUPT (func_type))
7202     return false;
7203
7204   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7205      generated for entry functions themselves.  */
7206   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7207     return false;
7208
7209   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7210      this would complicate matters for later code generation.  */
7211   if (TREE_CODE (exp) == CALL_EXPR)
7212     {
7213       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7214       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7215         return false;
7216     }
7217
7218   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7219     {
7220       /* Check that the return value locations are the same.  For
7221          example that we aren't returning a value from the sibling in
7222          a VFP register but then need to transfer it to a core
7223          register.  */
7224       rtx a, b;
7225       tree decl_or_type = decl;
7226
7227       /* If it is an indirect function pointer, get the function type.  */
7228       if (!decl)
7229         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7230
7231       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7232       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7233                               cfun->decl, false);
7234       if (!rtx_equal_p (a, b))
7235         return false;
7236     }
7237
7238   /* Never tailcall if function may be called with a misaligned SP.  */
7239   if (IS_STACKALIGN (func_type))
7240     return false;
7241
7242   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7243      references should become a NOP.  Don't convert such calls into
7244      sibling calls.  */
7245   if (TARGET_AAPCS_BASED
7246       && arm_abi == ARM_ABI_AAPCS
7247       && decl
7248       && DECL_WEAK (decl))
7249     return false;
7250
7251   /* We cannot do a tailcall for an indirect call by descriptor if all the
7252      argument registers are used because the only register left to load the
7253      address is IP and it will already contain the static chain.  */
7254   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7255     {
7256       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7257       CUMULATIVE_ARGS cum;
7258       cumulative_args_t cum_v;
7259
7260       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7261       cum_v = pack_cumulative_args (&cum);
7262
7263       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7264         {
7265           tree type = TREE_VALUE (t);
7266           if (!VOID_TYPE_P (type))
7267             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7268         }
7269
7270       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7271         return false;
7272     }
7273
7274   /* Everything else is ok.  */
7275   return true;
7276 }
7277
7278 \f
7279 /* Addressing mode support functions.  */
7280
7281 /* Return nonzero if X is a legitimate immediate operand when compiling
7282    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7283 int
7284 legitimate_pic_operand_p (rtx x)
7285 {
7286   if (GET_CODE (x) == SYMBOL_REF
7287       || (GET_CODE (x) == CONST
7288           && GET_CODE (XEXP (x, 0)) == PLUS
7289           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7290     return 0;
7291
7292   return 1;
7293 }
7294
7295 /* Record that the current function needs a PIC register.  Initialize
7296    cfun->machine->pic_reg if we have not already done so.  */
7297
7298 static void
7299 require_pic_register (void)
7300 {
7301   /* A lot of the logic here is made obscure by the fact that this
7302      routine gets called as part of the rtx cost estimation process.
7303      We don't want those calls to affect any assumptions about the real
7304      function; and further, we can't call entry_of_function() until we
7305      start the real expansion process.  */
7306   if (!crtl->uses_pic_offset_table)
7307     {
7308       gcc_assert (can_create_pseudo_p ());
7309       if (arm_pic_register != INVALID_REGNUM
7310           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7311         {
7312           if (!cfun->machine->pic_reg)
7313             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7314
7315           /* Play games to avoid marking the function as needing pic
7316              if we are being called as part of the cost-estimation
7317              process.  */
7318           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7319             crtl->uses_pic_offset_table = 1;
7320         }
7321       else
7322         {
7323           rtx_insn *seq, *insn;
7324
7325           if (!cfun->machine->pic_reg)
7326             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7327
7328           /* Play games to avoid marking the function as needing pic
7329              if we are being called as part of the cost-estimation
7330              process.  */
7331           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7332             {
7333               crtl->uses_pic_offset_table = 1;
7334               start_sequence ();
7335
7336               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7337                   && arm_pic_register > LAST_LO_REGNUM)
7338                 emit_move_insn (cfun->machine->pic_reg,
7339                                 gen_rtx_REG (Pmode, arm_pic_register));
7340               else
7341                 arm_load_pic_register (0UL);
7342
7343               seq = get_insns ();
7344               end_sequence ();
7345
7346               for (insn = seq; insn; insn = NEXT_INSN (insn))
7347                 if (INSN_P (insn))
7348                   INSN_LOCATION (insn) = prologue_location;
7349
7350               /* We can be called during expansion of PHI nodes, where
7351                  we can't yet emit instructions directly in the final
7352                  insn stream.  Queue the insns on the entry edge, they will
7353                  be committed after everything else is expanded.  */
7354               insert_insn_on_edge (seq,
7355                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7356             }
7357         }
7358     }
7359 }
7360
7361 rtx
7362 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7363 {
7364   if (GET_CODE (orig) == SYMBOL_REF
7365       || GET_CODE (orig) == LABEL_REF)
7366     {
7367       if (reg == 0)
7368         {
7369           gcc_assert (can_create_pseudo_p ());
7370           reg = gen_reg_rtx (Pmode);
7371         }
7372
7373       /* VxWorks does not impose a fixed gap between segments; the run-time
7374          gap can be different from the object-file gap.  We therefore can't
7375          use GOTOFF unless we are absolutely sure that the symbol is in the
7376          same segment as the GOT.  Unfortunately, the flexibility of linker
7377          scripts means that we can't be sure of that in general, so assume
7378          that GOTOFF is never valid on VxWorks.  */
7379       /* References to weak symbols cannot be resolved locally: they
7380          may be overridden by a non-weak definition at link time.  */
7381       rtx_insn *insn;
7382       if ((GET_CODE (orig) == LABEL_REF
7383            || (GET_CODE (orig) == SYMBOL_REF
7384                && SYMBOL_REF_LOCAL_P (orig)
7385                && (SYMBOL_REF_DECL (orig)
7386                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7387           && NEED_GOT_RELOC
7388           && arm_pic_data_is_text_relative)
7389         insn = arm_pic_static_addr (orig, reg);
7390       else
7391         {
7392           rtx pat;
7393           rtx mem;
7394
7395           /* If this function doesn't have a pic register, create one now.  */
7396           require_pic_register ();
7397
7398           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7399
7400           /* Make the MEM as close to a constant as possible.  */
7401           mem = SET_SRC (pat);
7402           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7403           MEM_READONLY_P (mem) = 1;
7404           MEM_NOTRAP_P (mem) = 1;
7405
7406           insn = emit_insn (pat);
7407         }
7408
7409       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7410          by loop.  */
7411       set_unique_reg_note (insn, REG_EQUAL, orig);
7412
7413       return reg;
7414     }
7415   else if (GET_CODE (orig) == CONST)
7416     {
7417       rtx base, offset;
7418
7419       if (GET_CODE (XEXP (orig, 0)) == PLUS
7420           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7421         return orig;
7422
7423       /* Handle the case where we have: const (UNSPEC_TLS).  */
7424       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7425           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7426         return orig;
7427
7428       /* Handle the case where we have:
7429          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7430          CONST_INT.  */
7431       if (GET_CODE (XEXP (orig, 0)) == PLUS
7432           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7433           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7434         {
7435           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7436           return orig;
7437         }
7438
7439       if (reg == 0)
7440         {
7441           gcc_assert (can_create_pseudo_p ());
7442           reg = gen_reg_rtx (Pmode);
7443         }
7444
7445       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7446
7447       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7448       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7449                                        base == reg ? 0 : reg);
7450
7451       if (CONST_INT_P (offset))
7452         {
7453           /* The base register doesn't really matter, we only want to
7454              test the index for the appropriate mode.  */
7455           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7456             {
7457               gcc_assert (can_create_pseudo_p ());
7458               offset = force_reg (Pmode, offset);
7459             }
7460
7461           if (CONST_INT_P (offset))
7462             return plus_constant (Pmode, base, INTVAL (offset));
7463         }
7464
7465       if (GET_MODE_SIZE (mode) > 4
7466           && (GET_MODE_CLASS (mode) == MODE_INT
7467               || TARGET_SOFT_FLOAT))
7468         {
7469           emit_insn (gen_addsi3 (reg, base, offset));
7470           return reg;
7471         }
7472
7473       return gen_rtx_PLUS (Pmode, base, offset);
7474     }
7475
7476   return orig;
7477 }
7478
7479
7480 /* Find a spare register to use during the prolog of a function.  */
7481
7482 static int
7483 thumb_find_work_register (unsigned long pushed_regs_mask)
7484 {
7485   int reg;
7486
7487   /* Check the argument registers first as these are call-used.  The
7488      register allocation order means that sometimes r3 might be used
7489      but earlier argument registers might not, so check them all.  */
7490   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7491     if (!df_regs_ever_live_p (reg))
7492       return reg;
7493
7494   /* Before going on to check the call-saved registers we can try a couple
7495      more ways of deducing that r3 is available.  The first is when we are
7496      pushing anonymous arguments onto the stack and we have less than 4
7497      registers worth of fixed arguments(*).  In this case r3 will be part of
7498      the variable argument list and so we can be sure that it will be
7499      pushed right at the start of the function.  Hence it will be available
7500      for the rest of the prologue.
7501      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7502   if (cfun->machine->uses_anonymous_args
7503       && crtl->args.pretend_args_size > 0)
7504     return LAST_ARG_REGNUM;
7505
7506   /* The other case is when we have fixed arguments but less than 4 registers
7507      worth.  In this case r3 might be used in the body of the function, but
7508      it is not being used to convey an argument into the function.  In theory
7509      we could just check crtl->args.size to see how many bytes are
7510      being passed in argument registers, but it seems that it is unreliable.
7511      Sometimes it will have the value 0 when in fact arguments are being
7512      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7513      check the args_info.nregs field as well.  The problem with this field is
7514      that it makes no allowances for arguments that are passed to the
7515      function but which are not used.  Hence we could miss an opportunity
7516      when a function has an unused argument in r3.  But it is better to be
7517      safe than to be sorry.  */
7518   if (! cfun->machine->uses_anonymous_args
7519       && crtl->args.size >= 0
7520       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7521       && (TARGET_AAPCS_BASED
7522           ? crtl->args.info.aapcs_ncrn < 4
7523           : crtl->args.info.nregs < 4))
7524     return LAST_ARG_REGNUM;
7525
7526   /* Otherwise look for a call-saved register that is going to be pushed.  */
7527   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7528     if (pushed_regs_mask & (1 << reg))
7529       return reg;
7530
7531   if (TARGET_THUMB2)
7532     {
7533       /* Thumb-2 can use high regs.  */
7534       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7535         if (pushed_regs_mask & (1 << reg))
7536           return reg;
7537     }
7538   /* Something went wrong - thumb_compute_save_reg_mask()
7539      should have arranged for a suitable register to be pushed.  */
7540   gcc_unreachable ();
7541 }
7542
7543 static GTY(()) int pic_labelno;
7544
7545 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7546    low register.  */
7547
7548 void
7549 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7550 {
7551   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7552
7553   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7554     return;
7555
7556   gcc_assert (flag_pic);
7557
7558   pic_reg = cfun->machine->pic_reg;
7559   if (TARGET_VXWORKS_RTP)
7560     {
7561       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7562       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7563       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7564
7565       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7566
7567       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7568       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7569     }
7570   else
7571     {
7572       /* We use an UNSPEC rather than a LABEL_REF because this label
7573          never appears in the code stream.  */
7574
7575       labelno = GEN_INT (pic_labelno++);
7576       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7577       l1 = gen_rtx_CONST (VOIDmode, l1);
7578
7579       /* On the ARM the PC register contains 'dot + 8' at the time of the
7580          addition, on the Thumb it is 'dot + 4'.  */
7581       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7582       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7583                                 UNSPEC_GOTSYM_OFF);
7584       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7585
7586       if (TARGET_32BIT)
7587         {
7588           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7589         }
7590       else /* TARGET_THUMB1 */
7591         {
7592           if (arm_pic_register != INVALID_REGNUM
7593               && REGNO (pic_reg) > LAST_LO_REGNUM)
7594             {
7595               /* We will have pushed the pic register, so we should always be
7596                  able to find a work register.  */
7597               pic_tmp = gen_rtx_REG (SImode,
7598                                      thumb_find_work_register (saved_regs));
7599               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7600               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7601               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7602             }
7603           else if (arm_pic_register != INVALID_REGNUM
7604                    && arm_pic_register > LAST_LO_REGNUM
7605                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7606             {
7607               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7608               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7609               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7610             }
7611           else
7612             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7613         }
7614     }
7615
7616   /* Need to emit this whether or not we obey regdecls,
7617      since setjmp/longjmp can cause life info to screw up.  */
7618   emit_use (pic_reg);
7619 }
7620
7621 /* Generate code to load the address of a static var when flag_pic is set.  */
7622 static rtx_insn *
7623 arm_pic_static_addr (rtx orig, rtx reg)
7624 {
7625   rtx l1, labelno, offset_rtx;
7626
7627   gcc_assert (flag_pic);
7628
7629   /* We use an UNSPEC rather than a LABEL_REF because this label
7630      never appears in the code stream.  */
7631   labelno = GEN_INT (pic_labelno++);
7632   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7633   l1 = gen_rtx_CONST (VOIDmode, l1);
7634
7635   /* On the ARM the PC register contains 'dot + 8' at the time of the
7636      addition, on the Thumb it is 'dot + 4'.  */
7637   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7638   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7639                                UNSPEC_SYMBOL_OFFSET);
7640   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7641
7642   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7643 }
7644
7645 /* Return nonzero if X is valid as an ARM state addressing register.  */
7646 static int
7647 arm_address_register_rtx_p (rtx x, int strict_p)
7648 {
7649   int regno;
7650
7651   if (!REG_P (x))
7652     return 0;
7653
7654   regno = REGNO (x);
7655
7656   if (strict_p)
7657     return ARM_REGNO_OK_FOR_BASE_P (regno);
7658
7659   return (regno <= LAST_ARM_REGNUM
7660           || regno >= FIRST_PSEUDO_REGISTER
7661           || regno == FRAME_POINTER_REGNUM
7662           || regno == ARG_POINTER_REGNUM);
7663 }
7664
7665 /* Return TRUE if this rtx is the difference of a symbol and a label,
7666    and will reduce to a PC-relative relocation in the object file.
7667    Expressions like this can be left alone when generating PIC, rather
7668    than forced through the GOT.  */
7669 static int
7670 pcrel_constant_p (rtx x)
7671 {
7672   if (GET_CODE (x) == MINUS)
7673     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7674
7675   return FALSE;
7676 }
7677
7678 /* Return true if X will surely end up in an index register after next
7679    splitting pass.  */
7680 static bool
7681 will_be_in_index_register (const_rtx x)
7682 {
7683   /* arm.md: calculate_pic_address will split this into a register.  */
7684   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7685 }
7686
7687 /* Return nonzero if X is a valid ARM state address operand.  */
7688 int
7689 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7690                                 int strict_p)
7691 {
7692   bool use_ldrd;
7693   enum rtx_code code = GET_CODE (x);
7694
7695   if (arm_address_register_rtx_p (x, strict_p))
7696     return 1;
7697
7698   use_ldrd = (TARGET_LDRD
7699               && (mode == DImode || mode == DFmode));
7700
7701   if (code == POST_INC || code == PRE_DEC
7702       || ((code == PRE_INC || code == POST_DEC)
7703           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7704     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7705
7706   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7707            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7708            && GET_CODE (XEXP (x, 1)) == PLUS
7709            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7710     {
7711       rtx addend = XEXP (XEXP (x, 1), 1);
7712
7713       /* Don't allow ldrd post increment by register because it's hard
7714          to fixup invalid register choices.  */
7715       if (use_ldrd
7716           && GET_CODE (x) == POST_MODIFY
7717           && REG_P (addend))
7718         return 0;
7719
7720       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7721               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7722     }
7723
7724   /* After reload constants split into minipools will have addresses
7725      from a LABEL_REF.  */
7726   else if (reload_completed
7727            && (code == LABEL_REF
7728                || (code == CONST
7729                    && GET_CODE (XEXP (x, 0)) == PLUS
7730                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7731                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7732     return 1;
7733
7734   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7735     return 0;
7736
7737   else if (code == PLUS)
7738     {
7739       rtx xop0 = XEXP (x, 0);
7740       rtx xop1 = XEXP (x, 1);
7741
7742       return ((arm_address_register_rtx_p (xop0, strict_p)
7743                && ((CONST_INT_P (xop1)
7744                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7745                    || (!strict_p && will_be_in_index_register (xop1))))
7746               || (arm_address_register_rtx_p (xop1, strict_p)
7747                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7748     }
7749
7750 #if 0
7751   /* Reload currently can't handle MINUS, so disable this for now */
7752   else if (GET_CODE (x) == MINUS)
7753     {
7754       rtx xop0 = XEXP (x, 0);
7755       rtx xop1 = XEXP (x, 1);
7756
7757       return (arm_address_register_rtx_p (xop0, strict_p)
7758               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7759     }
7760 #endif
7761
7762   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7763            && code == SYMBOL_REF
7764            && CONSTANT_POOL_ADDRESS_P (x)
7765            && ! (flag_pic
7766                  && symbol_mentioned_p (get_pool_constant (x))
7767                  && ! pcrel_constant_p (get_pool_constant (x))))
7768     return 1;
7769
7770   return 0;
7771 }
7772
7773 /* Return true if we can avoid creating a constant pool entry for x.  */
7774 static bool
7775 can_avoid_literal_pool_for_label_p (rtx x)
7776 {
7777   /* Normally we can assign constant values to target registers without
7778      the help of constant pool.  But there are cases we have to use constant
7779      pool like:
7780      1) assign a label to register.
7781      2) sign-extend a 8bit value to 32bit and then assign to register.
7782
7783      Constant pool access in format:
7784      (set (reg r0) (mem (symbol_ref (".LC0"))))
7785      will cause the use of literal pool (later in function arm_reorg).
7786      So here we mark such format as an invalid format, then the compiler
7787      will adjust it into:
7788      (set (reg r0) (symbol_ref (".LC0")))
7789      (set (reg r0) (mem (reg r0))).
7790      No extra register is required, and (mem (reg r0)) won't cause the use
7791      of literal pools.  */
7792   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7793       && CONSTANT_POOL_ADDRESS_P (x))
7794     return 1;
7795   return 0;
7796 }
7797
7798
7799 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7800 static int
7801 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7802 {
7803   bool use_ldrd;
7804   enum rtx_code code = GET_CODE (x);
7805
7806   if (arm_address_register_rtx_p (x, strict_p))
7807     return 1;
7808
7809   use_ldrd = (TARGET_LDRD
7810               && (mode == DImode || mode == DFmode));
7811
7812   if (code == POST_INC || code == PRE_DEC
7813       || ((code == PRE_INC || code == POST_DEC)
7814           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7815     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7816
7817   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7818            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7819            && GET_CODE (XEXP (x, 1)) == PLUS
7820            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7821     {
7822       /* Thumb-2 only has autoincrement by constant.  */
7823       rtx addend = XEXP (XEXP (x, 1), 1);
7824       HOST_WIDE_INT offset;
7825
7826       if (!CONST_INT_P (addend))
7827         return 0;
7828
7829       offset = INTVAL(addend);
7830       if (GET_MODE_SIZE (mode) <= 4)
7831         return (offset > -256 && offset < 256);
7832
7833       return (use_ldrd && offset > -1024 && offset < 1024
7834               && (offset & 3) == 0);
7835     }
7836
7837   /* After reload constants split into minipools will have addresses
7838      from a LABEL_REF.  */
7839   else if (reload_completed
7840            && (code == LABEL_REF
7841                || (code == CONST
7842                    && GET_CODE (XEXP (x, 0)) == PLUS
7843                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7844                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7845     return 1;
7846
7847   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7848     return 0;
7849
7850   else if (code == PLUS)
7851     {
7852       rtx xop0 = XEXP (x, 0);
7853       rtx xop1 = XEXP (x, 1);
7854
7855       return ((arm_address_register_rtx_p (xop0, strict_p)
7856                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7857                    || (!strict_p && will_be_in_index_register (xop1))))
7858               || (arm_address_register_rtx_p (xop1, strict_p)
7859                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7860     }
7861
7862   else if (can_avoid_literal_pool_for_label_p (x))
7863     return 0;
7864
7865   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7866            && code == SYMBOL_REF
7867            && CONSTANT_POOL_ADDRESS_P (x)
7868            && ! (flag_pic
7869                  && symbol_mentioned_p (get_pool_constant (x))
7870                  && ! pcrel_constant_p (get_pool_constant (x))))
7871     return 1;
7872
7873   return 0;
7874 }
7875
7876 /* Return nonzero if INDEX is valid for an address index operand in
7877    ARM state.  */
7878 static int
7879 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7880                         int strict_p)
7881 {
7882   HOST_WIDE_INT range;
7883   enum rtx_code code = GET_CODE (index);
7884
7885   /* Standard coprocessor addressing modes.  */
7886   if (TARGET_HARD_FLOAT
7887       && (mode == SFmode || mode == DFmode))
7888     return (code == CONST_INT && INTVAL (index) < 1024
7889             && INTVAL (index) > -1024
7890             && (INTVAL (index) & 3) == 0);
7891
7892   /* For quad modes, we restrict the constant offset to be slightly less
7893      than what the instruction format permits.  We do this because for
7894      quad mode moves, we will actually decompose them into two separate
7895      double-mode reads or writes.  INDEX must therefore be a valid
7896      (double-mode) offset and so should INDEX+8.  */
7897   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7898     return (code == CONST_INT
7899             && INTVAL (index) < 1016
7900             && INTVAL (index) > -1024
7901             && (INTVAL (index) & 3) == 0);
7902
7903   /* We have no such constraint on double mode offsets, so we permit the
7904      full range of the instruction format.  */
7905   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7906     return (code == CONST_INT
7907             && INTVAL (index) < 1024
7908             && INTVAL (index) > -1024
7909             && (INTVAL (index) & 3) == 0);
7910
7911   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7912     return (code == CONST_INT
7913             && INTVAL (index) < 1024
7914             && INTVAL (index) > -1024
7915             && (INTVAL (index) & 3) == 0);
7916
7917   if (arm_address_register_rtx_p (index, strict_p)
7918       && (GET_MODE_SIZE (mode) <= 4))
7919     return 1;
7920
7921   if (mode == DImode || mode == DFmode)
7922     {
7923       if (code == CONST_INT)
7924         {
7925           HOST_WIDE_INT val = INTVAL (index);
7926
7927           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7928              If vldr is selected it uses arm_coproc_mem_operand.  */
7929           if (TARGET_LDRD)
7930             return val > -256 && val < 256;
7931           else
7932             return val > -4096 && val < 4092;
7933         }
7934
7935       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7936     }
7937
7938   if (GET_MODE_SIZE (mode) <= 4
7939       && ! (arm_arch4
7940             && (mode == HImode
7941                 || mode == HFmode
7942                 || (mode == QImode && outer == SIGN_EXTEND))))
7943     {
7944       if (code == MULT)
7945         {
7946           rtx xiop0 = XEXP (index, 0);
7947           rtx xiop1 = XEXP (index, 1);
7948
7949           return ((arm_address_register_rtx_p (xiop0, strict_p)
7950                    && power_of_two_operand (xiop1, SImode))
7951                   || (arm_address_register_rtx_p (xiop1, strict_p)
7952                       && power_of_two_operand (xiop0, SImode)));
7953         }
7954       else if (code == LSHIFTRT || code == ASHIFTRT
7955                || code == ASHIFT || code == ROTATERT)
7956         {
7957           rtx op = XEXP (index, 1);
7958
7959           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7960                   && CONST_INT_P (op)
7961                   && INTVAL (op) > 0
7962                   && INTVAL (op) <= 31);
7963         }
7964     }
7965
7966   /* For ARM v4 we may be doing a sign-extend operation during the
7967      load.  */
7968   if (arm_arch4)
7969     {
7970       if (mode == HImode
7971           || mode == HFmode
7972           || (outer == SIGN_EXTEND && mode == QImode))
7973         range = 256;
7974       else
7975         range = 4096;
7976     }
7977   else
7978     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7979
7980   return (code == CONST_INT
7981           && INTVAL (index) < range
7982           && INTVAL (index) > -range);
7983 }
7984
7985 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7986    index operand.  i.e. 1, 2, 4 or 8.  */
7987 static bool
7988 thumb2_index_mul_operand (rtx op)
7989 {
7990   HOST_WIDE_INT val;
7991
7992   if (!CONST_INT_P (op))
7993     return false;
7994
7995   val = INTVAL(op);
7996   return (val == 1 || val == 2 || val == 4 || val == 8);
7997 }
7998
7999 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8000 static int
8001 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8002 {
8003   enum rtx_code code = GET_CODE (index);
8004
8005   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8006   /* Standard coprocessor addressing modes.  */
8007   if (TARGET_HARD_FLOAT
8008       && (mode == SFmode || mode == DFmode))
8009     return (code == CONST_INT && INTVAL (index) < 1024
8010             /* Thumb-2 allows only > -256 index range for it's core register
8011                load/stores. Since we allow SF/DF in core registers, we have
8012                to use the intersection between -256~4096 (core) and -1024~1024
8013                (coprocessor).  */
8014             && INTVAL (index) > -256
8015             && (INTVAL (index) & 3) == 0);
8016
8017   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8018     {
8019       /* For DImode assume values will usually live in core regs
8020          and only allow LDRD addressing modes.  */
8021       if (!TARGET_LDRD || mode != DImode)
8022         return (code == CONST_INT
8023                 && INTVAL (index) < 1024
8024                 && INTVAL (index) > -1024
8025                 && (INTVAL (index) & 3) == 0);
8026     }
8027
8028   /* For quad modes, we restrict the constant offset to be slightly less
8029      than what the instruction format permits.  We do this because for
8030      quad mode moves, we will actually decompose them into two separate
8031      double-mode reads or writes.  INDEX must therefore be a valid
8032      (double-mode) offset and so should INDEX+8.  */
8033   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8034     return (code == CONST_INT
8035             && INTVAL (index) < 1016
8036             && INTVAL (index) > -1024
8037             && (INTVAL (index) & 3) == 0);
8038
8039   /* We have no such constraint on double mode offsets, so we permit the
8040      full range of the instruction format.  */
8041   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8042     return (code == CONST_INT
8043             && INTVAL (index) < 1024
8044             && INTVAL (index) > -1024
8045             && (INTVAL (index) & 3) == 0);
8046
8047   if (arm_address_register_rtx_p (index, strict_p)
8048       && (GET_MODE_SIZE (mode) <= 4))
8049     return 1;
8050
8051   if (mode == DImode || mode == DFmode)
8052     {
8053       if (code == CONST_INT)
8054         {
8055           HOST_WIDE_INT val = INTVAL (index);
8056           /* Thumb-2 ldrd only has reg+const addressing modes.
8057              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8058              If vldr is selected it uses arm_coproc_mem_operand.  */
8059           if (TARGET_LDRD)
8060             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8061           else
8062             return IN_RANGE (val, -255, 4095 - 4);
8063         }
8064       else
8065         return 0;
8066     }
8067
8068   if (code == MULT)
8069     {
8070       rtx xiop0 = XEXP (index, 0);
8071       rtx xiop1 = XEXP (index, 1);
8072
8073       return ((arm_address_register_rtx_p (xiop0, strict_p)
8074                && thumb2_index_mul_operand (xiop1))
8075               || (arm_address_register_rtx_p (xiop1, strict_p)
8076                   && thumb2_index_mul_operand (xiop0)));
8077     }
8078   else if (code == ASHIFT)
8079     {
8080       rtx op = XEXP (index, 1);
8081
8082       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8083               && CONST_INT_P (op)
8084               && INTVAL (op) > 0
8085               && INTVAL (op) <= 3);
8086     }
8087
8088   return (code == CONST_INT
8089           && INTVAL (index) < 4096
8090           && INTVAL (index) > -256);
8091 }
8092
8093 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8094 static int
8095 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8096 {
8097   int regno;
8098
8099   if (!REG_P (x))
8100     return 0;
8101
8102   regno = REGNO (x);
8103
8104   if (strict_p)
8105     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8106
8107   return (regno <= LAST_LO_REGNUM
8108           || regno > LAST_VIRTUAL_REGISTER
8109           || regno == FRAME_POINTER_REGNUM
8110           || (GET_MODE_SIZE (mode) >= 4
8111               && (regno == STACK_POINTER_REGNUM
8112                   || regno >= FIRST_PSEUDO_REGISTER
8113                   || x == hard_frame_pointer_rtx
8114                   || x == arg_pointer_rtx)));
8115 }
8116
8117 /* Return nonzero if x is a legitimate index register.  This is the case
8118    for any base register that can access a QImode object.  */
8119 inline static int
8120 thumb1_index_register_rtx_p (rtx x, int strict_p)
8121 {
8122   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8123 }
8124
8125 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8126
8127    The AP may be eliminated to either the SP or the FP, so we use the
8128    least common denominator, e.g. SImode, and offsets from 0 to 64.
8129
8130    ??? Verify whether the above is the right approach.
8131
8132    ??? Also, the FP may be eliminated to the SP, so perhaps that
8133    needs special handling also.
8134
8135    ??? Look at how the mips16 port solves this problem.  It probably uses
8136    better ways to solve some of these problems.
8137
8138    Although it is not incorrect, we don't accept QImode and HImode
8139    addresses based on the frame pointer or arg pointer until the
8140    reload pass starts.  This is so that eliminating such addresses
8141    into stack based ones won't produce impossible code.  */
8142 int
8143 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8144 {
8145   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8146     return 0;
8147
8148   /* ??? Not clear if this is right.  Experiment.  */
8149   if (GET_MODE_SIZE (mode) < 4
8150       && !(reload_in_progress || reload_completed)
8151       && (reg_mentioned_p (frame_pointer_rtx, x)
8152           || reg_mentioned_p (arg_pointer_rtx, x)
8153           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8154           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8155           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8156           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8157     return 0;
8158
8159   /* Accept any base register.  SP only in SImode or larger.  */
8160   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8161     return 1;
8162
8163   /* This is PC relative data before arm_reorg runs.  */
8164   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8165            && GET_CODE (x) == SYMBOL_REF
8166            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8167     return 1;
8168
8169   /* This is PC relative data after arm_reorg runs.  */
8170   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8171            && reload_completed
8172            && (GET_CODE (x) == LABEL_REF
8173                || (GET_CODE (x) == CONST
8174                    && GET_CODE (XEXP (x, 0)) == PLUS
8175                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8176                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8177     return 1;
8178
8179   /* Post-inc indexing only supported for SImode and larger.  */
8180   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8181            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8182     return 1;
8183
8184   else if (GET_CODE (x) == PLUS)
8185     {
8186       /* REG+REG address can be any two index registers.  */
8187       /* We disallow FRAME+REG addressing since we know that FRAME
8188          will be replaced with STACK, and SP relative addressing only
8189          permits SP+OFFSET.  */
8190       if (GET_MODE_SIZE (mode) <= 4
8191           && XEXP (x, 0) != frame_pointer_rtx
8192           && XEXP (x, 1) != frame_pointer_rtx
8193           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8194           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8195               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8196         return 1;
8197
8198       /* REG+const has 5-7 bit offset for non-SP registers.  */
8199       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8200                 || XEXP (x, 0) == arg_pointer_rtx)
8201                && CONST_INT_P (XEXP (x, 1))
8202                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8203         return 1;
8204
8205       /* REG+const has 10-bit offset for SP, but only SImode and
8206          larger is supported.  */
8207       /* ??? Should probably check for DI/DFmode overflow here
8208          just like GO_IF_LEGITIMATE_OFFSET does.  */
8209       else if (REG_P (XEXP (x, 0))
8210                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8211                && GET_MODE_SIZE (mode) >= 4
8212                && CONST_INT_P (XEXP (x, 1))
8213                && INTVAL (XEXP (x, 1)) >= 0
8214                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8215                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8216         return 1;
8217
8218       else if (REG_P (XEXP (x, 0))
8219                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8220                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8221                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8222                        && REGNO (XEXP (x, 0))
8223                           <= LAST_VIRTUAL_POINTER_REGISTER))
8224                && GET_MODE_SIZE (mode) >= 4
8225                && CONST_INT_P (XEXP (x, 1))
8226                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8227         return 1;
8228     }
8229
8230   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8231            && GET_MODE_SIZE (mode) == 4
8232            && GET_CODE (x) == SYMBOL_REF
8233            && CONSTANT_POOL_ADDRESS_P (x)
8234            && ! (flag_pic
8235                  && symbol_mentioned_p (get_pool_constant (x))
8236                  && ! pcrel_constant_p (get_pool_constant (x))))
8237     return 1;
8238
8239   return 0;
8240 }
8241
8242 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8243    instruction of mode MODE.  */
8244 int
8245 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8246 {
8247   switch (GET_MODE_SIZE (mode))
8248     {
8249     case 1:
8250       return val >= 0 && val < 32;
8251
8252     case 2:
8253       return val >= 0 && val < 64 && (val & 1) == 0;
8254
8255     default:
8256       return (val >= 0
8257               && (val + GET_MODE_SIZE (mode)) <= 128
8258               && (val & 3) == 0);
8259     }
8260 }
8261
8262 bool
8263 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8264 {
8265   if (TARGET_ARM)
8266     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8267   else if (TARGET_THUMB2)
8268     return thumb2_legitimate_address_p (mode, x, strict_p);
8269   else /* if (TARGET_THUMB1) */
8270     return thumb1_legitimate_address_p (mode, x, strict_p);
8271 }
8272
8273 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8274
8275    Given an rtx X being reloaded into a reg required to be
8276    in class CLASS, return the class of reg to actually use.
8277    In general this is just CLASS, but for the Thumb core registers and
8278    immediate constants we prefer a LO_REGS class or a subset.  */
8279
8280 static reg_class_t
8281 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8282 {
8283   if (TARGET_32BIT)
8284     return rclass;
8285   else
8286     {
8287       if (rclass == GENERAL_REGS)
8288         return LO_REGS;
8289       else
8290         return rclass;
8291     }
8292 }
8293
8294 /* Build the SYMBOL_REF for __tls_get_addr.  */
8295
8296 static GTY(()) rtx tls_get_addr_libfunc;
8297
8298 static rtx
8299 get_tls_get_addr (void)
8300 {
8301   if (!tls_get_addr_libfunc)
8302     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8303   return tls_get_addr_libfunc;
8304 }
8305
8306 rtx
8307 arm_load_tp (rtx target)
8308 {
8309   if (!target)
8310     target = gen_reg_rtx (SImode);
8311
8312   if (TARGET_HARD_TP)
8313     {
8314       /* Can return in any reg.  */
8315       emit_insn (gen_load_tp_hard (target));
8316     }
8317   else
8318     {
8319       /* Always returned in r0.  Immediately copy the result into a pseudo,
8320          otherwise other uses of r0 (e.g. setting up function arguments) may
8321          clobber the value.  */
8322
8323       rtx tmp;
8324
8325       emit_insn (gen_load_tp_soft ());
8326
8327       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8328       emit_move_insn (target, tmp);
8329     }
8330   return target;
8331 }
8332
8333 static rtx
8334 load_tls_operand (rtx x, rtx reg)
8335 {
8336   rtx tmp;
8337
8338   if (reg == NULL_RTX)
8339     reg = gen_reg_rtx (SImode);
8340
8341   tmp = gen_rtx_CONST (SImode, x);
8342
8343   emit_move_insn (reg, tmp);
8344
8345   return reg;
8346 }
8347
8348 static rtx_insn *
8349 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8350 {
8351   rtx label, labelno, sum;
8352
8353   gcc_assert (reloc != TLS_DESCSEQ);
8354   start_sequence ();
8355
8356   labelno = GEN_INT (pic_labelno++);
8357   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8358   label = gen_rtx_CONST (VOIDmode, label);
8359
8360   sum = gen_rtx_UNSPEC (Pmode,
8361                         gen_rtvec (4, x, GEN_INT (reloc), label,
8362                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8363                         UNSPEC_TLS);
8364   reg = load_tls_operand (sum, reg);
8365
8366   if (TARGET_ARM)
8367     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8368   else
8369     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8370
8371   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8372                                      LCT_PURE, /* LCT_CONST?  */
8373                                      Pmode, reg, Pmode);
8374
8375   rtx_insn *insns = get_insns ();
8376   end_sequence ();
8377
8378   return insns;
8379 }
8380
8381 static rtx
8382 arm_tls_descseq_addr (rtx x, rtx reg)
8383 {
8384   rtx labelno = GEN_INT (pic_labelno++);
8385   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8386   rtx sum = gen_rtx_UNSPEC (Pmode,
8387                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8388                                        gen_rtx_CONST (VOIDmode, label),
8389                                        GEN_INT (!TARGET_ARM)),
8390                             UNSPEC_TLS);
8391   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8392
8393   emit_insn (gen_tlscall (x, labelno));
8394   if (!reg)
8395     reg = gen_reg_rtx (SImode);
8396   else
8397     gcc_assert (REGNO (reg) != R0_REGNUM);
8398
8399   emit_move_insn (reg, reg0);
8400
8401   return reg;
8402 }
8403
8404 rtx
8405 legitimize_tls_address (rtx x, rtx reg)
8406 {
8407   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8408   rtx_insn *insns;
8409   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8410
8411   switch (model)
8412     {
8413     case TLS_MODEL_GLOBAL_DYNAMIC:
8414       if (TARGET_GNU2_TLS)
8415         {
8416           reg = arm_tls_descseq_addr (x, reg);
8417
8418           tp = arm_load_tp (NULL_RTX);
8419
8420           dest = gen_rtx_PLUS (Pmode, tp, reg);
8421         }
8422       else
8423         {
8424           /* Original scheme */
8425           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8426           dest = gen_reg_rtx (Pmode);
8427           emit_libcall_block (insns, dest, ret, x);
8428         }
8429       return dest;
8430
8431     case TLS_MODEL_LOCAL_DYNAMIC:
8432       if (TARGET_GNU2_TLS)
8433         {
8434           reg = arm_tls_descseq_addr (x, reg);
8435
8436           tp = arm_load_tp (NULL_RTX);
8437
8438           dest = gen_rtx_PLUS (Pmode, tp, reg);
8439         }
8440       else
8441         {
8442           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8443
8444           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8445              share the LDM result with other LD model accesses.  */
8446           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8447                                 UNSPEC_TLS);
8448           dest = gen_reg_rtx (Pmode);
8449           emit_libcall_block (insns, dest, ret, eqv);
8450
8451           /* Load the addend.  */
8452           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8453                                                      GEN_INT (TLS_LDO32)),
8454                                    UNSPEC_TLS);
8455           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8456           dest = gen_rtx_PLUS (Pmode, dest, addend);
8457         }
8458       return dest;
8459
8460     case TLS_MODEL_INITIAL_EXEC:
8461       labelno = GEN_INT (pic_labelno++);
8462       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8463       label = gen_rtx_CONST (VOIDmode, label);
8464       sum = gen_rtx_UNSPEC (Pmode,
8465                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8466                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8467                             UNSPEC_TLS);
8468       reg = load_tls_operand (sum, reg);
8469
8470       if (TARGET_ARM)
8471         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8472       else if (TARGET_THUMB2)
8473         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8474       else
8475         {
8476           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8477           emit_move_insn (reg, gen_const_mem (SImode, reg));
8478         }
8479
8480       tp = arm_load_tp (NULL_RTX);
8481
8482       return gen_rtx_PLUS (Pmode, tp, reg);
8483
8484     case TLS_MODEL_LOCAL_EXEC:
8485       tp = arm_load_tp (NULL_RTX);
8486
8487       reg = gen_rtx_UNSPEC (Pmode,
8488                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8489                             UNSPEC_TLS);
8490       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8491
8492       return gen_rtx_PLUS (Pmode, tp, reg);
8493
8494     default:
8495       abort ();
8496     }
8497 }
8498
8499 /* Try machine-dependent ways of modifying an illegitimate address
8500    to be legitimate.  If we find one, return the new, valid address.  */
8501 rtx
8502 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8503 {
8504   if (arm_tls_referenced_p (x))
8505     {
8506       rtx addend = NULL;
8507
8508       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8509         {
8510           addend = XEXP (XEXP (x, 0), 1);
8511           x = XEXP (XEXP (x, 0), 0);
8512         }
8513
8514       if (GET_CODE (x) != SYMBOL_REF)
8515         return x;
8516
8517       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8518
8519       x = legitimize_tls_address (x, NULL_RTX);
8520
8521       if (addend)
8522         {
8523           x = gen_rtx_PLUS (SImode, x, addend);
8524           orig_x = x;
8525         }
8526       else
8527         return x;
8528     }
8529
8530   if (!TARGET_ARM)
8531     {
8532       /* TODO: legitimize_address for Thumb2.  */
8533       if (TARGET_THUMB2)
8534         return x;
8535       return thumb_legitimize_address (x, orig_x, mode);
8536     }
8537
8538   if (GET_CODE (x) == PLUS)
8539     {
8540       rtx xop0 = XEXP (x, 0);
8541       rtx xop1 = XEXP (x, 1);
8542
8543       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8544         xop0 = force_reg (SImode, xop0);
8545
8546       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8547           && !symbol_mentioned_p (xop1))
8548         xop1 = force_reg (SImode, xop1);
8549
8550       if (ARM_BASE_REGISTER_RTX_P (xop0)
8551           && CONST_INT_P (xop1))
8552         {
8553           HOST_WIDE_INT n, low_n;
8554           rtx base_reg, val;
8555           n = INTVAL (xop1);
8556
8557           /* VFP addressing modes actually allow greater offsets, but for
8558              now we just stick with the lowest common denominator.  */
8559           if (mode == DImode || mode == DFmode)
8560             {
8561               low_n = n & 0x0f;
8562               n &= ~0x0f;
8563               if (low_n > 4)
8564                 {
8565                   n += 16;
8566                   low_n -= 16;
8567                 }
8568             }
8569           else
8570             {
8571               low_n = ((mode) == TImode ? 0
8572                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8573               n -= low_n;
8574             }
8575
8576           base_reg = gen_reg_rtx (SImode);
8577           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8578           emit_move_insn (base_reg, val);
8579           x = plus_constant (Pmode, base_reg, low_n);
8580         }
8581       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8582         x = gen_rtx_PLUS (SImode, xop0, xop1);
8583     }
8584
8585   /* XXX We don't allow MINUS any more -- see comment in
8586      arm_legitimate_address_outer_p ().  */
8587   else if (GET_CODE (x) == MINUS)
8588     {
8589       rtx xop0 = XEXP (x, 0);
8590       rtx xop1 = XEXP (x, 1);
8591
8592       if (CONSTANT_P (xop0))
8593         xop0 = force_reg (SImode, xop0);
8594
8595       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8596         xop1 = force_reg (SImode, xop1);
8597
8598       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8599         x = gen_rtx_MINUS (SImode, xop0, xop1);
8600     }
8601
8602   /* Make sure to take full advantage of the pre-indexed addressing mode
8603      with absolute addresses which often allows for the base register to
8604      be factorized for multiple adjacent memory references, and it might
8605      even allows for the mini pool to be avoided entirely. */
8606   else if (CONST_INT_P (x) && optimize > 0)
8607     {
8608       unsigned int bits;
8609       HOST_WIDE_INT mask, base, index;
8610       rtx base_reg;
8611
8612       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8613          use a 8-bit index. So let's use a 12-bit index for SImode only and
8614          hope that arm_gen_constant will enable ldrb to use more bits. */
8615       bits = (mode == SImode) ? 12 : 8;
8616       mask = (1 << bits) - 1;
8617       base = INTVAL (x) & ~mask;
8618       index = INTVAL (x) & mask;
8619       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8620         {
8621           /* It'll most probably be more efficient to generate the base
8622              with more bits set and use a negative index instead. */
8623           base |= mask;
8624           index -= mask;
8625         }
8626       base_reg = force_reg (SImode, GEN_INT (base));
8627       x = plus_constant (Pmode, base_reg, index);
8628     }
8629
8630   if (flag_pic)
8631     {
8632       /* We need to find and carefully transform any SYMBOL and LABEL
8633          references; so go back to the original address expression.  */
8634       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8635
8636       if (new_x != orig_x)
8637         x = new_x;
8638     }
8639
8640   return x;
8641 }
8642
8643
8644 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8645    to be legitimate.  If we find one, return the new, valid address.  */
8646 rtx
8647 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8648 {
8649   if (GET_CODE (x) == PLUS
8650       && CONST_INT_P (XEXP (x, 1))
8651       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8652           || INTVAL (XEXP (x, 1)) < 0))
8653     {
8654       rtx xop0 = XEXP (x, 0);
8655       rtx xop1 = XEXP (x, 1);
8656       HOST_WIDE_INT offset = INTVAL (xop1);
8657
8658       /* Try and fold the offset into a biasing of the base register and
8659          then offsetting that.  Don't do this when optimizing for space
8660          since it can cause too many CSEs.  */
8661       if (optimize_size && offset >= 0
8662           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8663         {
8664           HOST_WIDE_INT delta;
8665
8666           if (offset >= 256)
8667             delta = offset - (256 - GET_MODE_SIZE (mode));
8668           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8669             delta = 31 * GET_MODE_SIZE (mode);
8670           else
8671             delta = offset & (~31 * GET_MODE_SIZE (mode));
8672
8673           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8674                                 NULL_RTX);
8675           x = plus_constant (Pmode, xop0, delta);
8676         }
8677       else if (offset < 0 && offset > -256)
8678         /* Small negative offsets are best done with a subtract before the
8679            dereference, forcing these into a register normally takes two
8680            instructions.  */
8681         x = force_operand (x, NULL_RTX);
8682       else
8683         {
8684           /* For the remaining cases, force the constant into a register.  */
8685           xop1 = force_reg (SImode, xop1);
8686           x = gen_rtx_PLUS (SImode, xop0, xop1);
8687         }
8688     }
8689   else if (GET_CODE (x) == PLUS
8690            && s_register_operand (XEXP (x, 1), SImode)
8691            && !s_register_operand (XEXP (x, 0), SImode))
8692     {
8693       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8694
8695       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8696     }
8697
8698   if (flag_pic)
8699     {
8700       /* We need to find and carefully transform any SYMBOL and LABEL
8701          references; so go back to the original address expression.  */
8702       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8703
8704       if (new_x != orig_x)
8705         x = new_x;
8706     }
8707
8708   return x;
8709 }
8710
8711 /* Return TRUE if X contains any TLS symbol references.  */
8712
8713 bool
8714 arm_tls_referenced_p (rtx x)
8715 {
8716   if (! TARGET_HAVE_TLS)
8717     return false;
8718
8719   subrtx_iterator::array_type array;
8720   FOR_EACH_SUBRTX (iter, array, x, ALL)
8721     {
8722       const_rtx x = *iter;
8723       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8724         {
8725           /* ARM currently does not provide relocations to encode TLS variables
8726              into AArch32 instructions, only data, so there is no way to
8727              currently implement these if a literal pool is disabled.  */
8728           if (arm_disable_literal_pool)
8729             sorry ("accessing thread-local storage is not currently supported "
8730                    "with -mpure-code or -mslow-flash-data");
8731
8732           return true;
8733         }
8734
8735       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8736          TLS offsets, not real symbol references.  */
8737       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8738         iter.skip_subrtxes ();
8739     }
8740   return false;
8741 }
8742
8743 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8744
8745    On the ARM, allow any integer (invalid ones are removed later by insn
8746    patterns), nice doubles and symbol_refs which refer to the function's
8747    constant pool XXX.
8748
8749    When generating pic allow anything.  */
8750
8751 static bool
8752 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8753 {
8754   return flag_pic || !label_mentioned_p (x);
8755 }
8756
8757 static bool
8758 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8759 {
8760   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8761      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8762      for ARMv8-M Baseline or later the result is valid.  */
8763   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8764     x = XEXP (x, 0);
8765
8766   return (CONST_INT_P (x)
8767           || CONST_DOUBLE_P (x)
8768           || CONSTANT_ADDRESS_P (x)
8769           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8770           || flag_pic);
8771 }
8772
8773 static bool
8774 arm_legitimate_constant_p (machine_mode mode, rtx x)
8775 {
8776   return (!arm_cannot_force_const_mem (mode, x)
8777           && (TARGET_32BIT
8778               ? arm_legitimate_constant_p_1 (mode, x)
8779               : thumb_legitimate_constant_p (mode, x)));
8780 }
8781
8782 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8783
8784 static bool
8785 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8786 {
8787   rtx base, offset;
8788
8789   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8790     {
8791       split_const (x, &base, &offset);
8792       if (GET_CODE (base) == SYMBOL_REF
8793           && !offset_within_block_p (base, INTVAL (offset)))
8794         return true;
8795     }
8796   return arm_tls_referenced_p (x);
8797 }
8798 \f
8799 #define REG_OR_SUBREG_REG(X)                                            \
8800   (REG_P (X)                                                    \
8801    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8802
8803 #define REG_OR_SUBREG_RTX(X)                    \
8804    (REG_P (X) ? (X) : SUBREG_REG (X))
8805
8806 static inline int
8807 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8808 {
8809   machine_mode mode = GET_MODE (x);
8810   int total, words;
8811
8812   switch (code)
8813     {
8814     case ASHIFT:
8815     case ASHIFTRT:
8816     case LSHIFTRT:
8817     case ROTATERT:
8818       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8819
8820     case PLUS:
8821     case MINUS:
8822     case COMPARE:
8823     case NEG:
8824     case NOT:
8825       return COSTS_N_INSNS (1);
8826
8827     case MULT:
8828       if (arm_arch6m && arm_m_profile_small_mul)
8829         return COSTS_N_INSNS (32);
8830
8831       if (CONST_INT_P (XEXP (x, 1)))
8832         {
8833           int cycles = 0;
8834           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8835
8836           while (i)
8837             {
8838               i >>= 2;
8839               cycles++;
8840             }
8841           return COSTS_N_INSNS (2) + cycles;
8842         }
8843       return COSTS_N_INSNS (1) + 16;
8844
8845     case SET:
8846       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8847          the mode.  */
8848       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8849       return (COSTS_N_INSNS (words)
8850               + 4 * ((MEM_P (SET_SRC (x)))
8851                      + MEM_P (SET_DEST (x))));
8852
8853     case CONST_INT:
8854       if (outer == SET)
8855         {
8856           if (UINTVAL (x) < 256
8857               /* 16-bit constant.  */
8858               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8859             return 0;
8860           if (thumb_shiftable_const (INTVAL (x)))
8861             return COSTS_N_INSNS (2);
8862           return COSTS_N_INSNS (3);
8863         }
8864       else if ((outer == PLUS || outer == COMPARE)
8865                && INTVAL (x) < 256 && INTVAL (x) > -256)
8866         return 0;
8867       else if ((outer == IOR || outer == XOR || outer == AND)
8868                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8869         return COSTS_N_INSNS (1);
8870       else if (outer == AND)
8871         {
8872           int i;
8873           /* This duplicates the tests in the andsi3 expander.  */
8874           for (i = 9; i <= 31; i++)
8875             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8876                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8877               return COSTS_N_INSNS (2);
8878         }
8879       else if (outer == ASHIFT || outer == ASHIFTRT
8880                || outer == LSHIFTRT)
8881         return 0;
8882       return COSTS_N_INSNS (2);
8883
8884     case CONST:
8885     case CONST_DOUBLE:
8886     case LABEL_REF:
8887     case SYMBOL_REF:
8888       return COSTS_N_INSNS (3);
8889
8890     case UDIV:
8891     case UMOD:
8892     case DIV:
8893     case MOD:
8894       return 100;
8895
8896     case TRUNCATE:
8897       return 99;
8898
8899     case AND:
8900     case XOR:
8901     case IOR:
8902       /* XXX guess.  */
8903       return 8;
8904
8905     case MEM:
8906       /* XXX another guess.  */
8907       /* Memory costs quite a lot for the first word, but subsequent words
8908          load at the equivalent of a single insn each.  */
8909       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8910               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8911                  ? 4 : 0));
8912
8913     case IF_THEN_ELSE:
8914       /* XXX a guess.  */
8915       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8916         return 14;
8917       return 2;
8918
8919     case SIGN_EXTEND:
8920     case ZERO_EXTEND:
8921       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8922       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8923
8924       if (mode == SImode)
8925         return total;
8926
8927       if (arm_arch6)
8928         return total + COSTS_N_INSNS (1);
8929
8930       /* Assume a two-shift sequence.  Increase the cost slightly so
8931          we prefer actual shifts over an extend operation.  */
8932       return total + 1 + COSTS_N_INSNS (2);
8933
8934     default:
8935       return 99;
8936     }
8937 }
8938
8939 /* Estimates the size cost of thumb1 instructions.
8940    For now most of the code is copied from thumb1_rtx_costs. We need more
8941    fine grain tuning when we have more related test cases.  */
8942 static inline int
8943 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8944 {
8945   machine_mode mode = GET_MODE (x);
8946   int words, cost;
8947
8948   switch (code)
8949     {
8950     case ASHIFT:
8951     case ASHIFTRT:
8952     case LSHIFTRT:
8953     case ROTATERT:
8954       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8955
8956     case PLUS:
8957     case MINUS:
8958       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8959          defined by RTL expansion, especially for the expansion of
8960          multiplication.  */
8961       if ((GET_CODE (XEXP (x, 0)) == MULT
8962            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8963           || (GET_CODE (XEXP (x, 1)) == MULT
8964               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8965         return COSTS_N_INSNS (2);
8966       /* Fall through.  */
8967     case COMPARE:
8968     case NEG:
8969     case NOT:
8970       return COSTS_N_INSNS (1);
8971
8972     case MULT:
8973       if (CONST_INT_P (XEXP (x, 1)))
8974         {
8975           /* Thumb1 mul instruction can't operate on const. We must Load it
8976              into a register first.  */
8977           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8978           /* For the targets which have a very small and high-latency multiply
8979              unit, we prefer to synthesize the mult with up to 5 instructions,
8980              giving a good balance between size and performance.  */
8981           if (arm_arch6m && arm_m_profile_small_mul)
8982             return COSTS_N_INSNS (5);
8983           else
8984             return COSTS_N_INSNS (1) + const_size;
8985         }
8986       return COSTS_N_INSNS (1);
8987
8988     case SET:
8989       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8990          the mode.  */
8991       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8992       cost = COSTS_N_INSNS (words);
8993       if (satisfies_constraint_J (SET_SRC (x))
8994           || satisfies_constraint_K (SET_SRC (x))
8995              /* Too big an immediate for a 2-byte mov, using MOVT.  */
8996           || (CONST_INT_P (SET_SRC (x))
8997               && UINTVAL (SET_SRC (x)) >= 256
8998               && TARGET_HAVE_MOVT
8999               && satisfies_constraint_j (SET_SRC (x)))
9000              /* thumb1_movdi_insn.  */
9001           || ((words > 1) && MEM_P (SET_SRC (x))))
9002         cost += COSTS_N_INSNS (1);
9003       return cost;
9004
9005     case CONST_INT:
9006       if (outer == SET)
9007         {
9008           if (UINTVAL (x) < 256)
9009             return COSTS_N_INSNS (1);
9010           /* movw is 4byte long.  */
9011           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9012             return COSTS_N_INSNS (2);
9013           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9014           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9015             return COSTS_N_INSNS (2);
9016           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9017           if (thumb_shiftable_const (INTVAL (x)))
9018             return COSTS_N_INSNS (2);
9019           return COSTS_N_INSNS (3);
9020         }
9021       else if ((outer == PLUS || outer == COMPARE)
9022                && INTVAL (x) < 256 && INTVAL (x) > -256)
9023         return 0;
9024       else if ((outer == IOR || outer == XOR || outer == AND)
9025                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9026         return COSTS_N_INSNS (1);
9027       else if (outer == AND)
9028         {
9029           int i;
9030           /* This duplicates the tests in the andsi3 expander.  */
9031           for (i = 9; i <= 31; i++)
9032             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9033                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9034               return COSTS_N_INSNS (2);
9035         }
9036       else if (outer == ASHIFT || outer == ASHIFTRT
9037                || outer == LSHIFTRT)
9038         return 0;
9039       return COSTS_N_INSNS (2);
9040
9041     case CONST:
9042     case CONST_DOUBLE:
9043     case LABEL_REF:
9044     case SYMBOL_REF:
9045       return COSTS_N_INSNS (3);
9046
9047     case UDIV:
9048     case UMOD:
9049     case DIV:
9050     case MOD:
9051       return 100;
9052
9053     case TRUNCATE:
9054       return 99;
9055
9056     case AND:
9057     case XOR:
9058     case IOR:
9059       return COSTS_N_INSNS (1);
9060
9061     case MEM:
9062       return (COSTS_N_INSNS (1)
9063               + COSTS_N_INSNS (1)
9064                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9065               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9066                  ? COSTS_N_INSNS (1) : 0));
9067
9068     case IF_THEN_ELSE:
9069       /* XXX a guess.  */
9070       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9071         return 14;
9072       return 2;
9073
9074     case ZERO_EXTEND:
9075       /* XXX still guessing.  */
9076       switch (GET_MODE (XEXP (x, 0)))
9077         {
9078           case E_QImode:
9079             return (1 + (mode == DImode ? 4 : 0)
9080                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9081
9082           case E_HImode:
9083             return (4 + (mode == DImode ? 4 : 0)
9084                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085
9086           case E_SImode:
9087             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9088
9089           default:
9090             return 99;
9091         }
9092
9093     default:
9094       return 99;
9095     }
9096 }
9097
9098 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9099    operand, then return the operand that is being shifted.  If the shift
9100    is not by a constant, then set SHIFT_REG to point to the operand.
9101    Return NULL if OP is not a shifter operand.  */
9102 static rtx
9103 shifter_op_p (rtx op, rtx *shift_reg)
9104 {
9105   enum rtx_code code = GET_CODE (op);
9106
9107   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9108       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9109     return XEXP (op, 0);
9110   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9111     return XEXP (op, 0);
9112   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9113            || code == ASHIFTRT)
9114     {
9115       if (!CONST_INT_P (XEXP (op, 1)))
9116         *shift_reg = XEXP (op, 1);
9117       return XEXP (op, 0);
9118     }
9119
9120   return NULL;
9121 }
9122
9123 static bool
9124 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9125 {
9126   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9127   rtx_code code = GET_CODE (x);
9128   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9129
9130   switch (XINT (x, 1))
9131     {
9132     case UNSPEC_UNALIGNED_LOAD:
9133       /* We can only do unaligned loads into the integer unit, and we can't
9134          use LDM or LDRD.  */
9135       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9136       if (speed_p)
9137         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9138                   + extra_cost->ldst.load_unaligned);
9139
9140 #ifdef NOT_YET
9141       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9142                                  ADDR_SPACE_GENERIC, speed_p);
9143 #endif
9144       return true;
9145
9146     case UNSPEC_UNALIGNED_STORE:
9147       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9148       if (speed_p)
9149         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9150                   + extra_cost->ldst.store_unaligned);
9151
9152       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9153 #ifdef NOT_YET
9154       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9155                                  ADDR_SPACE_GENERIC, speed_p);
9156 #endif
9157       return true;
9158
9159     case UNSPEC_VRINTZ:
9160     case UNSPEC_VRINTP:
9161     case UNSPEC_VRINTM:
9162     case UNSPEC_VRINTR:
9163     case UNSPEC_VRINTX:
9164     case UNSPEC_VRINTA:
9165       if (speed_p)
9166         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9167
9168       return true;
9169     default:
9170       *cost = COSTS_N_INSNS (2);
9171       break;
9172     }
9173   return true;
9174 }
9175
9176 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9177    call (one insn for -Os) and then one for processing the result.  */
9178 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9179
9180 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9181         do                                                              \
9182           {                                                             \
9183             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9184             if (shift_op != NULL                                        \
9185                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9186               {                                                         \
9187                 if (shift_reg)                                          \
9188                   {                                                     \
9189                     if (speed_p)                                        \
9190                       *cost += extra_cost->alu.arith_shift_reg;         \
9191                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9192                                        ASHIFT, 1, speed_p);             \
9193                   }                                                     \
9194                 else if (speed_p)                                       \
9195                   *cost += extra_cost->alu.arith_shift;                 \
9196                                                                         \
9197                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9198                                     ASHIFT, 0, speed_p)                 \
9199                           + rtx_cost (XEXP (x, 1 - IDX),                \
9200                                       GET_MODE (shift_op),              \
9201                                       OP, 1, speed_p));                 \
9202                 return true;                                            \
9203               }                                                         \
9204           }                                                             \
9205         while (0);
9206
9207 /* RTX costs.  Make an estimate of the cost of executing the operation
9208    X, which is contained with an operation with code OUTER_CODE.
9209    SPEED_P indicates whether the cost desired is the performance cost,
9210    or the size cost.  The estimate is stored in COST and the return
9211    value is TRUE if the cost calculation is final, or FALSE if the
9212    caller should recurse through the operands of X to add additional
9213    costs.
9214
9215    We currently make no attempt to model the size savings of Thumb-2
9216    16-bit instructions.  At the normal points in compilation where
9217    this code is called we have no measure of whether the condition
9218    flags are live or not, and thus no realistic way to determine what
9219    the size will eventually be.  */
9220 static bool
9221 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9222                    const struct cpu_cost_table *extra_cost,
9223                    int *cost, bool speed_p)
9224 {
9225   machine_mode mode = GET_MODE (x);
9226
9227   *cost = COSTS_N_INSNS (1);
9228
9229   if (TARGET_THUMB1)
9230     {
9231       if (speed_p)
9232         *cost = thumb1_rtx_costs (x, code, outer_code);
9233       else
9234         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9235       return true;
9236     }
9237
9238   switch (code)
9239     {
9240     case SET:
9241       *cost = 0;
9242       /* SET RTXs don't have a mode so we get it from the destination.  */
9243       mode = GET_MODE (SET_DEST (x));
9244
9245       if (REG_P (SET_SRC (x))
9246           && REG_P (SET_DEST (x)))
9247         {
9248           /* Assume that most copies can be done with a single insn,
9249              unless we don't have HW FP, in which case everything
9250              larger than word mode will require two insns.  */
9251           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9252                                    && GET_MODE_SIZE (mode) > 4)
9253                                   || mode == DImode)
9254                                  ? 2 : 1);
9255           /* Conditional register moves can be encoded
9256              in 16 bits in Thumb mode.  */
9257           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9258             *cost >>= 1;
9259
9260           return true;
9261         }
9262
9263       if (CONST_INT_P (SET_SRC (x)))
9264         {
9265           /* Handle CONST_INT here, since the value doesn't have a mode
9266              and we would otherwise be unable to work out the true cost.  */
9267           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9268                             0, speed_p);
9269           outer_code = SET;
9270           /* Slightly lower the cost of setting a core reg to a constant.
9271              This helps break up chains and allows for better scheduling.  */
9272           if (REG_P (SET_DEST (x))
9273               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9274             *cost -= 1;
9275           x = SET_SRC (x);
9276           /* Immediate moves with an immediate in the range [0, 255] can be
9277              encoded in 16 bits in Thumb mode.  */
9278           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9279               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9280             *cost >>= 1;
9281           goto const_int_cost;
9282         }
9283
9284       return false;
9285
9286     case MEM:
9287       /* A memory access costs 1 insn if the mode is small, or the address is
9288          a single register, otherwise it costs one insn per word.  */
9289       if (REG_P (XEXP (x, 0)))
9290         *cost = COSTS_N_INSNS (1);
9291       else if (flag_pic
9292                && GET_CODE (XEXP (x, 0)) == PLUS
9293                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9294         /* This will be split into two instructions.
9295            See arm.md:calculate_pic_address.  */
9296         *cost = COSTS_N_INSNS (2);
9297       else
9298         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9299
9300       /* For speed optimizations, add the costs of the address and
9301          accessing memory.  */
9302       if (speed_p)
9303 #ifdef NOT_YET
9304         *cost += (extra_cost->ldst.load
9305                   + arm_address_cost (XEXP (x, 0), mode,
9306                                       ADDR_SPACE_GENERIC, speed_p));
9307 #else
9308         *cost += extra_cost->ldst.load;
9309 #endif
9310       return true;
9311
9312     case PARALLEL:
9313     {
9314    /* Calculations of LDM costs are complex.  We assume an initial cost
9315    (ldm_1st) which will load the number of registers mentioned in
9316    ldm_regs_per_insn_1st registers; then each additional
9317    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9318    formula for N regs is thus:
9319
9320    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9321                              + ldm_regs_per_insn_subsequent - 1)
9322                             / ldm_regs_per_insn_subsequent).
9323
9324    Additional costs may also be added for addressing.  A similar
9325    formula is used for STM.  */
9326
9327       bool is_ldm = load_multiple_operation (x, SImode);
9328       bool is_stm = store_multiple_operation (x, SImode);
9329
9330       if (is_ldm || is_stm)
9331         {
9332           if (speed_p)
9333             {
9334               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9335               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9336                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9337                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9338               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9339                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9340                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9341
9342               *cost += regs_per_insn_1st
9343                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9344                                             + regs_per_insn_sub - 1)
9345                                           / regs_per_insn_sub);
9346               return true;
9347             }
9348
9349         }
9350       return false;
9351     }
9352     case DIV:
9353     case UDIV:
9354       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9355           && (mode == SFmode || !TARGET_VFP_SINGLE))
9356         *cost += COSTS_N_INSNS (speed_p
9357                                ? extra_cost->fp[mode != SFmode].div : 0);
9358       else if (mode == SImode && TARGET_IDIV)
9359         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9360       else
9361         *cost = LIBCALL_COST (2);
9362
9363       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9364          possible udiv is prefered.  */
9365       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9366       return false;     /* All arguments must be in registers.  */
9367
9368     case MOD:
9369       /* MOD by a power of 2 can be expanded as:
9370          rsbs    r1, r0, #0
9371          and     r0, r0, #(n - 1)
9372          and     r1, r1, #(n - 1)
9373          rsbpl   r0, r1, #0.  */
9374       if (CONST_INT_P (XEXP (x, 1))
9375           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9376           && mode == SImode)
9377         {
9378           *cost += COSTS_N_INSNS (3);
9379
9380           if (speed_p)
9381             *cost += 2 * extra_cost->alu.logical
9382                      + extra_cost->alu.arith;
9383           return true;
9384         }
9385
9386     /* Fall-through.  */
9387     case UMOD:
9388       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9389          possible udiv is prefered.  */
9390       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9391       return false;     /* All arguments must be in registers.  */
9392
9393     case ROTATE:
9394       if (mode == SImode && REG_P (XEXP (x, 1)))
9395         {
9396           *cost += (COSTS_N_INSNS (1)
9397                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9398           if (speed_p)
9399             *cost += extra_cost->alu.shift_reg;
9400           return true;
9401         }
9402       /* Fall through */
9403     case ROTATERT:
9404     case ASHIFT:
9405     case LSHIFTRT:
9406     case ASHIFTRT:
9407       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9408         {
9409           *cost += (COSTS_N_INSNS (2)
9410                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9411           if (speed_p)
9412             *cost += 2 * extra_cost->alu.shift;
9413           return true;
9414         }
9415       else if (mode == SImode)
9416         {
9417           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9418           /* Slightly disparage register shifts at -Os, but not by much.  */
9419           if (!CONST_INT_P (XEXP (x, 1)))
9420             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9421                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9422           return true;
9423         }
9424       else if (GET_MODE_CLASS (mode) == MODE_INT
9425                && GET_MODE_SIZE (mode) < 4)
9426         {
9427           if (code == ASHIFT)
9428             {
9429               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9430               /* Slightly disparage register shifts at -Os, but not by
9431                  much.  */
9432               if (!CONST_INT_P (XEXP (x, 1)))
9433                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9434                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9435             }
9436           else if (code == LSHIFTRT || code == ASHIFTRT)
9437             {
9438               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9439                 {
9440                   /* Can use SBFX/UBFX.  */
9441                   if (speed_p)
9442                     *cost += extra_cost->alu.bfx;
9443                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9444                 }
9445               else
9446                 {
9447                   *cost += COSTS_N_INSNS (1);
9448                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9449                   if (speed_p)
9450                     {
9451                       if (CONST_INT_P (XEXP (x, 1)))
9452                         *cost += 2 * extra_cost->alu.shift;
9453                       else
9454                         *cost += (extra_cost->alu.shift
9455                                   + extra_cost->alu.shift_reg);
9456                     }
9457                   else
9458                     /* Slightly disparage register shifts.  */
9459                     *cost += !CONST_INT_P (XEXP (x, 1));
9460                 }
9461             }
9462           else /* Rotates.  */
9463             {
9464               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9465               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9466               if (speed_p)
9467                 {
9468                   if (CONST_INT_P (XEXP (x, 1)))
9469                     *cost += (2 * extra_cost->alu.shift
9470                               + extra_cost->alu.log_shift);
9471                   else
9472                     *cost += (extra_cost->alu.shift
9473                               + extra_cost->alu.shift_reg
9474                               + extra_cost->alu.log_shift_reg);
9475                 }
9476             }
9477           return true;
9478         }
9479
9480       *cost = LIBCALL_COST (2);
9481       return false;
9482
9483     case BSWAP:
9484       if (arm_arch6)
9485         {
9486           if (mode == SImode)
9487             {
9488               if (speed_p)
9489                 *cost += extra_cost->alu.rev;
9490
9491               return false;
9492             }
9493         }
9494       else
9495         {
9496         /* No rev instruction available.  Look at arm_legacy_rev
9497            and thumb_legacy_rev for the form of RTL used then.  */
9498           if (TARGET_THUMB)
9499             {
9500               *cost += COSTS_N_INSNS (9);
9501
9502               if (speed_p)
9503                 {
9504                   *cost += 6 * extra_cost->alu.shift;
9505                   *cost += 3 * extra_cost->alu.logical;
9506                 }
9507             }
9508           else
9509             {
9510               *cost += COSTS_N_INSNS (4);
9511
9512               if (speed_p)
9513                 {
9514                   *cost += 2 * extra_cost->alu.shift;
9515                   *cost += extra_cost->alu.arith_shift;
9516                   *cost += 2 * extra_cost->alu.logical;
9517                 }
9518             }
9519           return true;
9520         }
9521       return false;
9522
9523     case MINUS:
9524       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9525           && (mode == SFmode || !TARGET_VFP_SINGLE))
9526         {
9527           if (GET_CODE (XEXP (x, 0)) == MULT
9528               || GET_CODE (XEXP (x, 1)) == MULT)
9529             {
9530               rtx mul_op0, mul_op1, sub_op;
9531
9532               if (speed_p)
9533                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9534
9535               if (GET_CODE (XEXP (x, 0)) == MULT)
9536                 {
9537                   mul_op0 = XEXP (XEXP (x, 0), 0);
9538                   mul_op1 = XEXP (XEXP (x, 0), 1);
9539                   sub_op = XEXP (x, 1);
9540                 }
9541               else
9542                 {
9543                   mul_op0 = XEXP (XEXP (x, 1), 0);
9544                   mul_op1 = XEXP (XEXP (x, 1), 1);
9545                   sub_op = XEXP (x, 0);
9546                 }
9547
9548               /* The first operand of the multiply may be optionally
9549                  negated.  */
9550               if (GET_CODE (mul_op0) == NEG)
9551                 mul_op0 = XEXP (mul_op0, 0);
9552
9553               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9554                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9555                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9556
9557               return true;
9558             }
9559
9560           if (speed_p)
9561             *cost += extra_cost->fp[mode != SFmode].addsub;
9562           return false;
9563         }
9564
9565       if (mode == SImode)
9566         {
9567           rtx shift_by_reg = NULL;
9568           rtx shift_op;
9569           rtx non_shift_op;
9570
9571           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9572           if (shift_op == NULL)
9573             {
9574               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9575               non_shift_op = XEXP (x, 0);
9576             }
9577           else
9578             non_shift_op = XEXP (x, 1);
9579
9580           if (shift_op != NULL)
9581             {
9582               if (shift_by_reg != NULL)
9583                 {
9584                   if (speed_p)
9585                     *cost += extra_cost->alu.arith_shift_reg;
9586                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9587                 }
9588               else if (speed_p)
9589                 *cost += extra_cost->alu.arith_shift;
9590
9591               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9592               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9593               return true;
9594             }
9595
9596           if (arm_arch_thumb2
9597               && GET_CODE (XEXP (x, 1)) == MULT)
9598             {
9599               /* MLS.  */
9600               if (speed_p)
9601                 *cost += extra_cost->mult[0].add;
9602               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9603               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9604               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9605               return true;
9606             }
9607
9608           if (CONST_INT_P (XEXP (x, 0)))
9609             {
9610               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9611                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9612                                             NULL_RTX, 1, 0);
9613               *cost = COSTS_N_INSNS (insns);
9614               if (speed_p)
9615                 *cost += insns * extra_cost->alu.arith;
9616               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9617               return true;
9618             }
9619           else if (speed_p)
9620             *cost += extra_cost->alu.arith;
9621
9622           return false;
9623         }
9624
9625       if (GET_MODE_CLASS (mode) == MODE_INT
9626           && GET_MODE_SIZE (mode) < 4)
9627         {
9628           rtx shift_op, shift_reg;
9629           shift_reg = NULL;
9630
9631           /* We check both sides of the MINUS for shifter operands since,
9632              unlike PLUS, it's not commutative.  */
9633
9634           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9635           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9636
9637           /* Slightly disparage, as we might need to widen the result.  */
9638           *cost += 1;
9639           if (speed_p)
9640             *cost += extra_cost->alu.arith;
9641
9642           if (CONST_INT_P (XEXP (x, 0)))
9643             {
9644               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9645               return true;
9646             }
9647
9648           return false;
9649         }
9650
9651       if (mode == DImode)
9652         {
9653           *cost += COSTS_N_INSNS (1);
9654
9655           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9656             {
9657               rtx op1 = XEXP (x, 1);
9658
9659               if (speed_p)
9660                 *cost += 2 * extra_cost->alu.arith;
9661
9662               if (GET_CODE (op1) == ZERO_EXTEND)
9663                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9664                                    0, speed_p);
9665               else
9666                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9667               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9668                                  0, speed_p);
9669               return true;
9670             }
9671           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9672             {
9673               if (speed_p)
9674                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9675               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9676                                   0, speed_p)
9677                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9678               return true;
9679             }
9680           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9681                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9682             {
9683               if (speed_p)
9684                 *cost += (extra_cost->alu.arith
9685                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9686                              ? extra_cost->alu.arith
9687                              : extra_cost->alu.arith_shift));
9688               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9689                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9690                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9691               return true;
9692             }
9693
9694           if (speed_p)
9695             *cost += 2 * extra_cost->alu.arith;
9696           return false;
9697         }
9698
9699       /* Vector mode?  */
9700
9701       *cost = LIBCALL_COST (2);
9702       return false;
9703
9704     case PLUS:
9705       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9706           && (mode == SFmode || !TARGET_VFP_SINGLE))
9707         {
9708           if (GET_CODE (XEXP (x, 0)) == MULT)
9709             {
9710               rtx mul_op0, mul_op1, add_op;
9711
9712               if (speed_p)
9713                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9714
9715               mul_op0 = XEXP (XEXP (x, 0), 0);
9716               mul_op1 = XEXP (XEXP (x, 0), 1);
9717               add_op = XEXP (x, 1);
9718
9719               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9720                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9721                         + rtx_cost (add_op, mode, code, 0, speed_p));
9722
9723               return true;
9724             }
9725
9726           if (speed_p)
9727             *cost += extra_cost->fp[mode != SFmode].addsub;
9728           return false;
9729         }
9730       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9731         {
9732           *cost = LIBCALL_COST (2);
9733           return false;
9734         }
9735
9736         /* Narrow modes can be synthesized in SImode, but the range
9737            of useful sub-operations is limited.  Check for shift operations
9738            on one of the operands.  Only left shifts can be used in the
9739            narrow modes.  */
9740       if (GET_MODE_CLASS (mode) == MODE_INT
9741           && GET_MODE_SIZE (mode) < 4)
9742         {
9743           rtx shift_op, shift_reg;
9744           shift_reg = NULL;
9745
9746           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9747
9748           if (CONST_INT_P (XEXP (x, 1)))
9749             {
9750               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9751                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9752                                             NULL_RTX, 1, 0);
9753               *cost = COSTS_N_INSNS (insns);
9754               if (speed_p)
9755                 *cost += insns * extra_cost->alu.arith;
9756               /* Slightly penalize a narrow operation as the result may
9757                  need widening.  */
9758               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9759               return true;
9760             }
9761
9762           /* Slightly penalize a narrow operation as the result may
9763              need widening.  */
9764           *cost += 1;
9765           if (speed_p)
9766             *cost += extra_cost->alu.arith;
9767
9768           return false;
9769         }
9770
9771       if (mode == SImode)
9772         {
9773           rtx shift_op, shift_reg;
9774
9775           if (TARGET_INT_SIMD
9776               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9777                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9778             {
9779               /* UXTA[BH] or SXTA[BH].  */
9780               if (speed_p)
9781                 *cost += extra_cost->alu.extend_arith;
9782               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9783                                   0, speed_p)
9784                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9785               return true;
9786             }
9787
9788           shift_reg = NULL;
9789           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9790           if (shift_op != NULL)
9791             {
9792               if (shift_reg)
9793                 {
9794                   if (speed_p)
9795                     *cost += extra_cost->alu.arith_shift_reg;
9796                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9797                 }
9798               else if (speed_p)
9799                 *cost += extra_cost->alu.arith_shift;
9800
9801               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9802                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9803               return true;
9804             }
9805           if (GET_CODE (XEXP (x, 0)) == MULT)
9806             {
9807               rtx mul_op = XEXP (x, 0);
9808
9809               if (TARGET_DSP_MULTIPLY
9810                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822                                       == 16))))))
9823                 {
9824                   /* SMLA[BT][BT].  */
9825                   if (speed_p)
9826                     *cost += extra_cost->mult[0].extend_add;
9827                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9828                                       SIGN_EXTEND, 0, speed_p)
9829                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9830                                         SIGN_EXTEND, 0, speed_p)
9831                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9832                   return true;
9833                 }
9834
9835               if (speed_p)
9836                 *cost += extra_cost->mult[0].add;
9837               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9838                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9839                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9840               return true;
9841             }
9842           if (CONST_INT_P (XEXP (x, 1)))
9843             {
9844               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9846                                             NULL_RTX, 1, 0);
9847               *cost = COSTS_N_INSNS (insns);
9848               if (speed_p)
9849                 *cost += insns * extra_cost->alu.arith;
9850               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9851               return true;
9852             }
9853           else if (speed_p)
9854             *cost += extra_cost->alu.arith;
9855
9856           return false;
9857         }
9858
9859       if (mode == DImode)
9860         {
9861           if (arm_arch3m
9862               && GET_CODE (XEXP (x, 0)) == MULT
9863               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9864                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9865                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9866                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9867             {
9868               if (speed_p)
9869                 *cost += extra_cost->mult[1].extend_add;
9870               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9871                                   ZERO_EXTEND, 0, speed_p)
9872                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9873                                     ZERO_EXTEND, 0, speed_p)
9874                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9875               return true;
9876             }
9877
9878           *cost += COSTS_N_INSNS (1);
9879
9880           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9881               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9882             {
9883               if (speed_p)
9884                 *cost += (extra_cost->alu.arith
9885                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886                              ? extra_cost->alu.arith
9887                              : extra_cost->alu.arith_shift));
9888
9889               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9890                                   0, speed_p)
9891                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9892               return true;
9893             }
9894
9895           if (speed_p)
9896             *cost += 2 * extra_cost->alu.arith;
9897           return false;
9898         }
9899
9900       /* Vector mode?  */
9901       *cost = LIBCALL_COST (2);
9902       return false;
9903     case IOR:
9904       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9905         {
9906           if (speed_p)
9907             *cost += extra_cost->alu.rev;
9908
9909           return true;
9910         }
9911     /* Fall through.  */
9912     case AND: case XOR:
9913       if (mode == SImode)
9914         {
9915           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9916           rtx op0 = XEXP (x, 0);
9917           rtx shift_op, shift_reg;
9918
9919           if (subcode == NOT
9920               && (code == AND
9921                   || (code == IOR && TARGET_THUMB2)))
9922             op0 = XEXP (op0, 0);
9923
9924           shift_reg = NULL;
9925           shift_op = shifter_op_p (op0, &shift_reg);
9926           if (shift_op != NULL)
9927             {
9928               if (shift_reg)
9929                 {
9930                   if (speed_p)
9931                     *cost += extra_cost->alu.log_shift_reg;
9932                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9933                 }
9934               else if (speed_p)
9935                 *cost += extra_cost->alu.log_shift;
9936
9937               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9938                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9939               return true;
9940             }
9941
9942           if (CONST_INT_P (XEXP (x, 1)))
9943             {
9944               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9945                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9946                                             NULL_RTX, 1, 0);
9947
9948               *cost = COSTS_N_INSNS (insns);
9949               if (speed_p)
9950                 *cost += insns * extra_cost->alu.logical;
9951               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9952               return true;
9953             }
9954
9955           if (speed_p)
9956             *cost += extra_cost->alu.logical;
9957           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9958                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9959           return true;
9960         }
9961
9962       if (mode == DImode)
9963         {
9964           rtx op0 = XEXP (x, 0);
9965           enum rtx_code subcode = GET_CODE (op0);
9966
9967           *cost += COSTS_N_INSNS (1);
9968
9969           if (subcode == NOT
9970               && (code == AND
9971                   || (code == IOR && TARGET_THUMB2)))
9972             op0 = XEXP (op0, 0);
9973
9974           if (GET_CODE (op0) == ZERO_EXTEND)
9975             {
9976               if (speed_p)
9977                 *cost += 2 * extra_cost->alu.logical;
9978
9979               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9980                                   0, speed_p)
9981                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9982               return true;
9983             }
9984           else if (GET_CODE (op0) == SIGN_EXTEND)
9985             {
9986               if (speed_p)
9987                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9988
9989               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9990                                   0, speed_p)
9991                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9992               return true;
9993             }
9994
9995           if (speed_p)
9996             *cost += 2 * extra_cost->alu.logical;
9997
9998           return true;
9999         }
10000       /* Vector mode?  */
10001
10002       *cost = LIBCALL_COST (2);
10003       return false;
10004
10005     case MULT:
10006       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10007           && (mode == SFmode || !TARGET_VFP_SINGLE))
10008         {
10009           rtx op0 = XEXP (x, 0);
10010
10011           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10012             op0 = XEXP (op0, 0);
10013
10014           if (speed_p)
10015             *cost += extra_cost->fp[mode != SFmode].mult;
10016
10017           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10018                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10019           return true;
10020         }
10021       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10022         {
10023           *cost = LIBCALL_COST (2);
10024           return false;
10025         }
10026
10027       if (mode == SImode)
10028         {
10029           if (TARGET_DSP_MULTIPLY
10030               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10031                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10032                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10033                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10034                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10035                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10036                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10037                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10038                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10039                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10040                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10041                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10042                                   == 16))))))
10043             {
10044               /* SMUL[TB][TB].  */
10045               if (speed_p)
10046                 *cost += extra_cost->mult[0].extend;
10047               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10048                                  SIGN_EXTEND, 0, speed_p);
10049               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10050                                  SIGN_EXTEND, 1, speed_p);
10051               return true;
10052             }
10053           if (speed_p)
10054             *cost += extra_cost->mult[0].simple;
10055           return false;
10056         }
10057
10058       if (mode == DImode)
10059         {
10060           if (arm_arch3m
10061               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10065             {
10066               if (speed_p)
10067                 *cost += extra_cost->mult[1].extend;
10068               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10069                                   ZERO_EXTEND, 0, speed_p)
10070                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10071                                     ZERO_EXTEND, 0, speed_p));
10072               return true;
10073             }
10074
10075           *cost = LIBCALL_COST (2);
10076           return false;
10077         }
10078
10079       /* Vector mode?  */
10080       *cost = LIBCALL_COST (2);
10081       return false;
10082
10083     case NEG:
10084       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10085           && (mode == SFmode || !TARGET_VFP_SINGLE))
10086         {
10087           if (GET_CODE (XEXP (x, 0)) == MULT)
10088             {
10089               /* VNMUL.  */
10090               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10091               return true;
10092             }
10093
10094           if (speed_p)
10095             *cost += extra_cost->fp[mode != SFmode].neg;
10096
10097           return false;
10098         }
10099       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10100         {
10101           *cost = LIBCALL_COST (1);
10102           return false;
10103         }
10104
10105       if (mode == SImode)
10106         {
10107           if (GET_CODE (XEXP (x, 0)) == ABS)
10108             {
10109               *cost += COSTS_N_INSNS (1);
10110               /* Assume the non-flag-changing variant.  */
10111               if (speed_p)
10112                 *cost += (extra_cost->alu.log_shift
10113                           + extra_cost->alu.arith_shift);
10114               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10115               return true;
10116             }
10117
10118           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10119               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10120             {
10121               *cost += COSTS_N_INSNS (1);
10122               /* No extra cost for MOV imm and MVN imm.  */
10123               /* If the comparison op is using the flags, there's no further
10124                  cost, otherwise we need to add the cost of the comparison.  */
10125               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10126                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10127                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10128                 {
10129                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10130                   *cost += (COSTS_N_INSNS (1)
10131                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10132                                         0, speed_p)
10133                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10134                                         1, speed_p));
10135                   if (speed_p)
10136                     *cost += extra_cost->alu.arith;
10137                 }
10138               return true;
10139             }
10140
10141           if (speed_p)
10142             *cost += extra_cost->alu.arith;
10143           return false;
10144         }
10145
10146       if (GET_MODE_CLASS (mode) == MODE_INT
10147           && GET_MODE_SIZE (mode) < 4)
10148         {
10149           /* Slightly disparage, as we might need an extend operation.  */
10150           *cost += 1;
10151           if (speed_p)
10152             *cost += extra_cost->alu.arith;
10153           return false;
10154         }
10155
10156       if (mode == DImode)
10157         {
10158           *cost += COSTS_N_INSNS (1);
10159           if (speed_p)
10160             *cost += 2 * extra_cost->alu.arith;
10161           return false;
10162         }
10163
10164       /* Vector mode?  */
10165       *cost = LIBCALL_COST (1);
10166       return false;
10167
10168     case NOT:
10169       if (mode == SImode)
10170         {
10171           rtx shift_op;
10172           rtx shift_reg = NULL;
10173
10174           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10175
10176           if (shift_op)
10177             {
10178               if (shift_reg != NULL)
10179                 {
10180                   if (speed_p)
10181                     *cost += extra_cost->alu.log_shift_reg;
10182                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10183                 }
10184               else if (speed_p)
10185                 *cost += extra_cost->alu.log_shift;
10186               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10187               return true;
10188             }
10189
10190           if (speed_p)
10191             *cost += extra_cost->alu.logical;
10192           return false;
10193         }
10194       if (mode == DImode)
10195         {
10196           *cost += COSTS_N_INSNS (1);
10197           return false;
10198         }
10199
10200       /* Vector mode?  */
10201
10202       *cost += LIBCALL_COST (1);
10203       return false;
10204
10205     case IF_THEN_ELSE:
10206       {
10207         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10208           {
10209             *cost += COSTS_N_INSNS (3);
10210             return true;
10211           }
10212         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10213         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10214
10215         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10216         /* Assume that if one arm of the if_then_else is a register,
10217            that it will be tied with the result and eliminate the
10218            conditional insn.  */
10219         if (REG_P (XEXP (x, 1)))
10220           *cost += op2cost;
10221         else if (REG_P (XEXP (x, 2)))
10222           *cost += op1cost;
10223         else
10224           {
10225             if (speed_p)
10226               {
10227                 if (extra_cost->alu.non_exec_costs_exec)
10228                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10229                 else
10230                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10231               }
10232             else
10233               *cost += op1cost + op2cost;
10234           }
10235       }
10236       return true;
10237
10238     case COMPARE:
10239       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10240         *cost = 0;
10241       else
10242         {
10243           machine_mode op0mode;
10244           /* We'll mostly assume that the cost of a compare is the cost of the
10245              LHS.  However, there are some notable exceptions.  */
10246
10247           /* Floating point compares are never done as side-effects.  */
10248           op0mode = GET_MODE (XEXP (x, 0));
10249           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10250               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10251             {
10252               if (speed_p)
10253                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10254
10255               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10256                 {
10257                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10258                   return true;
10259                 }
10260
10261               return false;
10262             }
10263           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10264             {
10265               *cost = LIBCALL_COST (2);
10266               return false;
10267             }
10268
10269           /* DImode compares normally take two insns.  */
10270           if (op0mode == DImode)
10271             {
10272               *cost += COSTS_N_INSNS (1);
10273               if (speed_p)
10274                 *cost += 2 * extra_cost->alu.arith;
10275               return false;
10276             }
10277
10278           if (op0mode == SImode)
10279             {
10280               rtx shift_op;
10281               rtx shift_reg;
10282
10283               if (XEXP (x, 1) == const0_rtx
10284                   && !(REG_P (XEXP (x, 0))
10285                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10286                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10287                 {
10288                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10289
10290                   /* Multiply operations that set the flags are often
10291                      significantly more expensive.  */
10292                   if (speed_p
10293                       && GET_CODE (XEXP (x, 0)) == MULT
10294                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10295                     *cost += extra_cost->mult[0].flag_setting;
10296
10297                   if (speed_p
10298                       && GET_CODE (XEXP (x, 0)) == PLUS
10299                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10300                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10301                                                             0), 1), mode))
10302                     *cost += extra_cost->mult[0].flag_setting;
10303                   return true;
10304                 }
10305
10306               shift_reg = NULL;
10307               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10308               if (shift_op != NULL)
10309                 {
10310                   if (shift_reg != NULL)
10311                     {
10312                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10313                                          1, speed_p);
10314                       if (speed_p)
10315                         *cost += extra_cost->alu.arith_shift_reg;
10316                     }
10317                   else if (speed_p)
10318                     *cost += extra_cost->alu.arith_shift;
10319                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10320                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10321                   return true;
10322                 }
10323
10324               if (speed_p)
10325                 *cost += extra_cost->alu.arith;
10326               if (CONST_INT_P (XEXP (x, 1))
10327                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10328                 {
10329                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10330                   return true;
10331                 }
10332               return false;
10333             }
10334
10335           /* Vector mode?  */
10336
10337           *cost = LIBCALL_COST (2);
10338           return false;
10339         }
10340       return true;
10341
10342     case EQ:
10343     case NE:
10344     case LT:
10345     case LE:
10346     case GT:
10347     case GE:
10348     case LTU:
10349     case LEU:
10350     case GEU:
10351     case GTU:
10352     case ORDERED:
10353     case UNORDERED:
10354     case UNEQ:
10355     case UNLE:
10356     case UNLT:
10357     case UNGE:
10358     case UNGT:
10359     case LTGT:
10360       if (outer_code == SET)
10361         {
10362           /* Is it a store-flag operation?  */
10363           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10364               && XEXP (x, 1) == const0_rtx)
10365             {
10366               /* Thumb also needs an IT insn.  */
10367               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10368               return true;
10369             }
10370           if (XEXP (x, 1) == const0_rtx)
10371             {
10372               switch (code)
10373                 {
10374                 case LT:
10375                   /* LSR Rd, Rn, #31.  */
10376                   if (speed_p)
10377                     *cost += extra_cost->alu.shift;
10378                   break;
10379
10380                 case EQ:
10381                   /* RSBS T1, Rn, #0
10382                      ADC  Rd, Rn, T1.  */
10383
10384                 case NE:
10385                   /* SUBS T1, Rn, #1
10386                      SBC  Rd, Rn, T1.  */
10387                   *cost += COSTS_N_INSNS (1);
10388                   break;
10389
10390                 case LE:
10391                   /* RSBS T1, Rn, Rn, LSR #31
10392                      ADC  Rd, Rn, T1. */
10393                   *cost += COSTS_N_INSNS (1);
10394                   if (speed_p)
10395                     *cost += extra_cost->alu.arith_shift;
10396                   break;
10397
10398                 case GT:
10399                   /* RSB  Rd, Rn, Rn, ASR #1
10400                      LSR  Rd, Rd, #31.  */
10401                   *cost += COSTS_N_INSNS (1);
10402                   if (speed_p)
10403                     *cost += (extra_cost->alu.arith_shift
10404                               + extra_cost->alu.shift);
10405                   break;
10406
10407                 case GE:
10408                   /* ASR  Rd, Rn, #31
10409                      ADD  Rd, Rn, #1.  */
10410                   *cost += COSTS_N_INSNS (1);
10411                   if (speed_p)
10412                     *cost += extra_cost->alu.shift;
10413                   break;
10414
10415                 default:
10416                   /* Remaining cases are either meaningless or would take
10417                      three insns anyway.  */
10418                   *cost = COSTS_N_INSNS (3);
10419                   break;
10420                 }
10421               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10422               return true;
10423             }
10424           else
10425             {
10426               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10427               if (CONST_INT_P (XEXP (x, 1))
10428                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10429                 {
10430                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10431                   return true;
10432                 }
10433
10434               return false;
10435             }
10436         }
10437       /* Not directly inside a set.  If it involves the condition code
10438          register it must be the condition for a branch, cond_exec or
10439          I_T_E operation.  Since the comparison is performed elsewhere
10440          this is just the control part which has no additional
10441          cost.  */
10442       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10443                && XEXP (x, 1) == const0_rtx)
10444         {
10445           *cost = 0;
10446           return true;
10447         }
10448       return false;
10449
10450     case ABS:
10451       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10452           && (mode == SFmode || !TARGET_VFP_SINGLE))
10453         {
10454           if (speed_p)
10455             *cost += extra_cost->fp[mode != SFmode].neg;
10456
10457           return false;
10458         }
10459       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10460         {
10461           *cost = LIBCALL_COST (1);
10462           return false;
10463         }
10464
10465       if (mode == SImode)
10466         {
10467           if (speed_p)
10468             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469           return false;
10470         }
10471       /* Vector mode?  */
10472       *cost = LIBCALL_COST (1);
10473       return false;
10474
10475     case SIGN_EXTEND:
10476       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477           && MEM_P (XEXP (x, 0)))
10478         {
10479           if (mode == DImode)
10480             *cost += COSTS_N_INSNS (1);
10481
10482           if (!speed_p)
10483             return true;
10484
10485           if (GET_MODE (XEXP (x, 0)) == SImode)
10486             *cost += extra_cost->ldst.load;
10487           else
10488             *cost += extra_cost->ldst.load_sign_extend;
10489
10490           if (mode == DImode)
10491             *cost += extra_cost->alu.shift;
10492
10493           return true;
10494         }
10495
10496       /* Widening from less than 32-bits requires an extend operation.  */
10497       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10498         {
10499           /* We have SXTB/SXTH.  */
10500           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10501           if (speed_p)
10502             *cost += extra_cost->alu.extend;
10503         }
10504       else if (GET_MODE (XEXP (x, 0)) != SImode)
10505         {
10506           /* Needs two shifts.  */
10507           *cost += COSTS_N_INSNS (1);
10508           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10509           if (speed_p)
10510             *cost += 2 * extra_cost->alu.shift;
10511         }
10512
10513       /* Widening beyond 32-bits requires one more insn.  */
10514       if (mode == DImode)
10515         {
10516           *cost += COSTS_N_INSNS (1);
10517           if (speed_p)
10518             *cost += extra_cost->alu.shift;
10519         }
10520
10521       return true;
10522
10523     case ZERO_EXTEND:
10524       if ((arm_arch4
10525            || GET_MODE (XEXP (x, 0)) == SImode
10526            || GET_MODE (XEXP (x, 0)) == QImode)
10527           && MEM_P (XEXP (x, 0)))
10528         {
10529           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10530
10531           if (mode == DImode)
10532             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10533
10534           return true;
10535         }
10536
10537       /* Widening from less than 32-bits requires an extend operation.  */
10538       if (GET_MODE (XEXP (x, 0)) == QImode)
10539         {
10540           /* UXTB can be a shorter instruction in Thumb2, but it might
10541              be slower than the AND Rd, Rn, #255 alternative.  When
10542              optimizing for speed it should never be slower to use
10543              AND, and we don't really model 16-bit vs 32-bit insns
10544              here.  */
10545           if (speed_p)
10546             *cost += extra_cost->alu.logical;
10547         }
10548       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10549         {
10550           /* We have UXTB/UXTH.  */
10551           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10552           if (speed_p)
10553             *cost += extra_cost->alu.extend;
10554         }
10555       else if (GET_MODE (XEXP (x, 0)) != SImode)
10556         {
10557           /* Needs two shifts.  It's marginally preferable to use
10558              shifts rather than two BIC instructions as the second
10559              shift may merge with a subsequent insn as a shifter
10560              op.  */
10561           *cost = COSTS_N_INSNS (2);
10562           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10563           if (speed_p)
10564             *cost += 2 * extra_cost->alu.shift;
10565         }
10566
10567       /* Widening beyond 32-bits requires one more insn.  */
10568       if (mode == DImode)
10569         {
10570           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10571         }
10572
10573       return true;
10574
10575     case CONST_INT:
10576       *cost = 0;
10577       /* CONST_INT has no mode, so we cannot tell for sure how many
10578          insns are really going to be needed.  The best we can do is
10579          look at the value passed.  If it fits in SImode, then assume
10580          that's the mode it will be used for.  Otherwise assume it
10581          will be used in DImode.  */
10582       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10583         mode = SImode;
10584       else
10585         mode = DImode;
10586
10587       /* Avoid blowing up in arm_gen_constant ().  */
10588       if (!(outer_code == PLUS
10589             || outer_code == AND
10590             || outer_code == IOR
10591             || outer_code == XOR
10592             || outer_code == MINUS))
10593         outer_code = SET;
10594
10595     const_int_cost:
10596       if (mode == SImode)
10597         {
10598           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10599                                                     INTVAL (x), NULL, NULL,
10600                                                     0, 0));
10601           /* Extra costs?  */
10602         }
10603       else
10604         {
10605           *cost += COSTS_N_INSNS (arm_gen_constant
10606                                   (outer_code, SImode, NULL,
10607                                    trunc_int_for_mode (INTVAL (x), SImode),
10608                                    NULL, NULL, 0, 0)
10609                                   + arm_gen_constant (outer_code, SImode, NULL,
10610                                                       INTVAL (x) >> 32, NULL,
10611                                                       NULL, 0, 0));
10612           /* Extra costs?  */
10613         }
10614
10615       return true;
10616
10617     case CONST:
10618     case LABEL_REF:
10619     case SYMBOL_REF:
10620       if (speed_p)
10621         {
10622           if (arm_arch_thumb2 && !flag_pic)
10623             *cost += COSTS_N_INSNS (1);
10624           else
10625             *cost += extra_cost->ldst.load;
10626         }
10627       else
10628         *cost += COSTS_N_INSNS (1);
10629
10630       if (flag_pic)
10631         {
10632           *cost += COSTS_N_INSNS (1);
10633           if (speed_p)
10634             *cost += extra_cost->alu.arith;
10635         }
10636
10637       return true;
10638
10639     case CONST_FIXED:
10640       *cost = COSTS_N_INSNS (4);
10641       /* Fixme.  */
10642       return true;
10643
10644     case CONST_DOUBLE:
10645       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646           && (mode == SFmode || !TARGET_VFP_SINGLE))
10647         {
10648           if (vfp3_const_double_rtx (x))
10649             {
10650               if (speed_p)
10651                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10652               return true;
10653             }
10654
10655           if (speed_p)
10656             {
10657               if (mode == DFmode)
10658                 *cost += extra_cost->ldst.loadd;
10659               else
10660                 *cost += extra_cost->ldst.loadf;
10661             }
10662           else
10663             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10664
10665           return true;
10666         }
10667       *cost = COSTS_N_INSNS (4);
10668       return true;
10669
10670     case CONST_VECTOR:
10671       /* Fixme.  */
10672       if (TARGET_NEON
10673           && TARGET_HARD_FLOAT
10674           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10675           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10676         *cost = COSTS_N_INSNS (1);
10677       else
10678         *cost = COSTS_N_INSNS (4);
10679       return true;
10680
10681     case HIGH:
10682     case LO_SUM:
10683       /* When optimizing for size, we prefer constant pool entries to
10684          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10685       if (!speed_p)
10686         *cost += 1;
10687       return true;
10688
10689     case CLZ:
10690       if (speed_p)
10691         *cost += extra_cost->alu.clz;
10692       return false;
10693
10694     case SMIN:
10695       if (XEXP (x, 1) == const0_rtx)
10696         {
10697           if (speed_p)
10698             *cost += extra_cost->alu.log_shift;
10699           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10700           return true;
10701         }
10702       /* Fall through.  */
10703     case SMAX:
10704     case UMIN:
10705     case UMAX:
10706       *cost += COSTS_N_INSNS (1);
10707       return false;
10708
10709     case TRUNCATE:
10710       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10711           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10712           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10713           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10714           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10715                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10716               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10717                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10718                       == ZERO_EXTEND))))
10719         {
10720           if (speed_p)
10721             *cost += extra_cost->mult[1].extend;
10722           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10723                               ZERO_EXTEND, 0, speed_p)
10724                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10725                                 ZERO_EXTEND, 0, speed_p));
10726           return true;
10727         }
10728       *cost = LIBCALL_COST (1);
10729       return false;
10730
10731     case UNSPEC_VOLATILE:
10732     case UNSPEC:
10733       return arm_unspec_cost (x, outer_code, speed_p, cost);
10734
10735     case PC:
10736       /* Reading the PC is like reading any other register.  Writing it
10737          is more expensive, but we take that into account elsewhere.  */
10738       *cost = 0;
10739       return true;
10740
10741     case ZERO_EXTRACT:
10742       /* TODO: Simple zero_extract of bottom bits using AND.  */
10743       /* Fall through.  */
10744     case SIGN_EXTRACT:
10745       if (arm_arch6
10746           && mode == SImode
10747           && CONST_INT_P (XEXP (x, 1))
10748           && CONST_INT_P (XEXP (x, 2)))
10749         {
10750           if (speed_p)
10751             *cost += extra_cost->alu.bfx;
10752           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10753           return true;
10754         }
10755       /* Without UBFX/SBFX, need to resort to shift operations.  */
10756       *cost += COSTS_N_INSNS (1);
10757       if (speed_p)
10758         *cost += 2 * extra_cost->alu.shift;
10759       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10760       return true;
10761
10762     case FLOAT_EXTEND:
10763       if (TARGET_HARD_FLOAT)
10764         {
10765           if (speed_p)
10766             *cost += extra_cost->fp[mode == DFmode].widen;
10767           if (!TARGET_VFP5
10768               && GET_MODE (XEXP (x, 0)) == HFmode)
10769             {
10770               /* Pre v8, widening HF->DF is a two-step process, first
10771                  widening to SFmode.  */
10772               *cost += COSTS_N_INSNS (1);
10773               if (speed_p)
10774                 *cost += extra_cost->fp[0].widen;
10775             }
10776           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10777           return true;
10778         }
10779
10780       *cost = LIBCALL_COST (1);
10781       return false;
10782
10783     case FLOAT_TRUNCATE:
10784       if (TARGET_HARD_FLOAT)
10785         {
10786           if (speed_p)
10787             *cost += extra_cost->fp[mode == DFmode].narrow;
10788           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10789           return true;
10790           /* Vector modes?  */
10791         }
10792       *cost = LIBCALL_COST (1);
10793       return false;
10794
10795     case FMA:
10796       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10797         {
10798           rtx op0 = XEXP (x, 0);
10799           rtx op1 = XEXP (x, 1);
10800           rtx op2 = XEXP (x, 2);
10801
10802
10803           /* vfms or vfnma.  */
10804           if (GET_CODE (op0) == NEG)
10805             op0 = XEXP (op0, 0);
10806
10807           /* vfnms or vfnma.  */
10808           if (GET_CODE (op2) == NEG)
10809             op2 = XEXP (op2, 0);
10810
10811           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10812           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10813           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10814
10815           if (speed_p)
10816             *cost += extra_cost->fp[mode ==DFmode].fma;
10817
10818           return true;
10819         }
10820
10821       *cost = LIBCALL_COST (3);
10822       return false;
10823
10824     case FIX:
10825     case UNSIGNED_FIX:
10826       if (TARGET_HARD_FLOAT)
10827         {
10828           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10829              a vcvt fixed-point conversion.  */
10830           if (code == FIX && mode == SImode
10831               && GET_CODE (XEXP (x, 0)) == FIX
10832               && GET_MODE (XEXP (x, 0)) == SFmode
10833               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10834               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10835                  > 0)
10836             {
10837               if (speed_p)
10838                 *cost += extra_cost->fp[0].toint;
10839
10840               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10841                                  code, 0, speed_p);
10842               return true;
10843             }
10844
10845           if (GET_MODE_CLASS (mode) == MODE_INT)
10846             {
10847               mode = GET_MODE (XEXP (x, 0));
10848               if (speed_p)
10849                 *cost += extra_cost->fp[mode == DFmode].toint;
10850               /* Strip of the 'cost' of rounding towards zero.  */
10851               if (GET_CODE (XEXP (x, 0)) == FIX)
10852                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10853                                    0, speed_p);
10854               else
10855                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10856               /* ??? Increase the cost to deal with transferring from
10857                  FP -> CORE registers?  */
10858               return true;
10859             }
10860           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10861                    && TARGET_VFP5)
10862             {
10863               if (speed_p)
10864                 *cost += extra_cost->fp[mode == DFmode].roundint;
10865               return false;
10866             }
10867           /* Vector costs? */
10868         }
10869       *cost = LIBCALL_COST (1);
10870       return false;
10871
10872     case FLOAT:
10873     case UNSIGNED_FLOAT:
10874       if (TARGET_HARD_FLOAT)
10875         {
10876           /* ??? Increase the cost to deal with transferring from CORE
10877              -> FP registers?  */
10878           if (speed_p)
10879             *cost += extra_cost->fp[mode == DFmode].fromint;
10880           return false;
10881         }
10882       *cost = LIBCALL_COST (1);
10883       return false;
10884
10885     case CALL:
10886       return true;
10887
10888     case ASM_OPERANDS:
10889       {
10890       /* Just a guess.  Guess number of instructions in the asm
10891          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10892          though (see PR60663).  */
10893         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10894         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10895
10896         *cost = COSTS_N_INSNS (asm_length + num_operands);
10897         return true;
10898       }
10899     default:
10900       if (mode != VOIDmode)
10901         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10902       else
10903         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10904       return false;
10905     }
10906 }
10907
10908 #undef HANDLE_NARROW_SHIFT_ARITH
10909
10910 /* RTX costs entry point.  */
10911
10912 static bool
10913 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10914                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10915 {
10916   bool result;
10917   int code = GET_CODE (x);
10918   gcc_assert (current_tune->insn_extra_cost);
10919
10920   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10921                                 (enum rtx_code) outer_code,
10922                                 current_tune->insn_extra_cost,
10923                                 total, speed);
10924
10925   if (dump_file && (dump_flags & TDF_DETAILS))
10926     {
10927       print_rtl_single (dump_file, x);
10928       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10929                *total, result ? "final" : "partial");
10930     }
10931   return result;
10932 }
10933
10934 /* All address computations that can be done are free, but rtx cost returns
10935    the same for practically all of them.  So we weight the different types
10936    of address here in the order (most pref first):
10937    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10938 static inline int
10939 arm_arm_address_cost (rtx x)
10940 {
10941   enum rtx_code c  = GET_CODE (x);
10942
10943   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10944     return 0;
10945   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10946     return 10;
10947
10948   if (c == PLUS)
10949     {
10950       if (CONST_INT_P (XEXP (x, 1)))
10951         return 2;
10952
10953       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10954         return 3;
10955
10956       return 4;
10957     }
10958
10959   return 6;
10960 }
10961
10962 static inline int
10963 arm_thumb_address_cost (rtx x)
10964 {
10965   enum rtx_code c  = GET_CODE (x);
10966
10967   if (c == REG)
10968     return 1;
10969   if (c == PLUS
10970       && REG_P (XEXP (x, 0))
10971       && CONST_INT_P (XEXP (x, 1)))
10972     return 1;
10973
10974   return 2;
10975 }
10976
10977 static int
10978 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10979                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10980 {
10981   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10982 }
10983
10984 /* Adjust cost hook for XScale.  */
10985 static bool
10986 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10987                           int * cost)
10988 {
10989   /* Some true dependencies can have a higher cost depending
10990      on precisely how certain input operands are used.  */
10991   if (dep_type == 0
10992       && recog_memoized (insn) >= 0
10993       && recog_memoized (dep) >= 0)
10994     {
10995       int shift_opnum = get_attr_shift (insn);
10996       enum attr_type attr_type = get_attr_type (dep);
10997
10998       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10999          operand for INSN.  If we have a shifted input operand and the
11000          instruction we depend on is another ALU instruction, then we may
11001          have to account for an additional stall.  */
11002       if (shift_opnum != 0
11003           && (attr_type == TYPE_ALU_SHIFT_IMM
11004               || attr_type == TYPE_ALUS_SHIFT_IMM
11005               || attr_type == TYPE_LOGIC_SHIFT_IMM
11006               || attr_type == TYPE_LOGICS_SHIFT_IMM
11007               || attr_type == TYPE_ALU_SHIFT_REG
11008               || attr_type == TYPE_ALUS_SHIFT_REG
11009               || attr_type == TYPE_LOGIC_SHIFT_REG
11010               || attr_type == TYPE_LOGICS_SHIFT_REG
11011               || attr_type == TYPE_MOV_SHIFT
11012               || attr_type == TYPE_MVN_SHIFT
11013               || attr_type == TYPE_MOV_SHIFT_REG
11014               || attr_type == TYPE_MVN_SHIFT_REG))
11015         {
11016           rtx shifted_operand;
11017           int opno;
11018
11019           /* Get the shifted operand.  */
11020           extract_insn (insn);
11021           shifted_operand = recog_data.operand[shift_opnum];
11022
11023           /* Iterate over all the operands in DEP.  If we write an operand
11024              that overlaps with SHIFTED_OPERAND, then we have increase the
11025              cost of this dependency.  */
11026           extract_insn (dep);
11027           preprocess_constraints (dep);
11028           for (opno = 0; opno < recog_data.n_operands; opno++)
11029             {
11030               /* We can ignore strict inputs.  */
11031               if (recog_data.operand_type[opno] == OP_IN)
11032                 continue;
11033
11034               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11035                                            shifted_operand))
11036                 {
11037                   *cost = 2;
11038                   return false;
11039                 }
11040             }
11041         }
11042     }
11043   return true;
11044 }
11045
11046 /* Adjust cost hook for Cortex A9.  */
11047 static bool
11048 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11049                              int * cost)
11050 {
11051   switch (dep_type)
11052     {
11053     case REG_DEP_ANTI:
11054       *cost = 0;
11055       return false;
11056
11057     case REG_DEP_TRUE:
11058     case REG_DEP_OUTPUT:
11059         if (recog_memoized (insn) >= 0
11060             && recog_memoized (dep) >= 0)
11061           {
11062             if (GET_CODE (PATTERN (insn)) == SET)
11063               {
11064                 if (GET_MODE_CLASS
11065                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11066                   || GET_MODE_CLASS
11067                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11068                   {
11069                     enum attr_type attr_type_insn = get_attr_type (insn);
11070                     enum attr_type attr_type_dep = get_attr_type (dep);
11071
11072                     /* By default all dependencies of the form
11073                        s0 = s0 <op> s1
11074                        s0 = s0 <op> s2
11075                        have an extra latency of 1 cycle because
11076                        of the input and output dependency in this
11077                        case. However this gets modeled as an true
11078                        dependency and hence all these checks.  */
11079                     if (REG_P (SET_DEST (PATTERN (insn)))
11080                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11081                       {
11082                         /* FMACS is a special case where the dependent
11083                            instruction can be issued 3 cycles before
11084                            the normal latency in case of an output
11085                            dependency.  */
11086                         if ((attr_type_insn == TYPE_FMACS
11087                              || attr_type_insn == TYPE_FMACD)
11088                             && (attr_type_dep == TYPE_FMACS
11089                                 || attr_type_dep == TYPE_FMACD))
11090                           {
11091                             if (dep_type == REG_DEP_OUTPUT)
11092                               *cost = insn_default_latency (dep) - 3;
11093                             else
11094                               *cost = insn_default_latency (dep);
11095                             return false;
11096                           }
11097                         else
11098                           {
11099                             if (dep_type == REG_DEP_OUTPUT)
11100                               *cost = insn_default_latency (dep) + 1;
11101                             else
11102                               *cost = insn_default_latency (dep);
11103                           }
11104                         return false;
11105                       }
11106                   }
11107               }
11108           }
11109         break;
11110
11111     default:
11112       gcc_unreachable ();
11113     }
11114
11115   return true;
11116 }
11117
11118 /* Adjust cost hook for FA726TE.  */
11119 static bool
11120 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11121                            int * cost)
11122 {
11123   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11124      have penalty of 3.  */
11125   if (dep_type == REG_DEP_TRUE
11126       && recog_memoized (insn) >= 0
11127       && recog_memoized (dep) >= 0
11128       && get_attr_conds (dep) == CONDS_SET)
11129     {
11130       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11131       if (get_attr_conds (insn) == CONDS_USE
11132           && get_attr_type (insn) != TYPE_BRANCH)
11133         {
11134           *cost = 3;
11135           return false;
11136         }
11137
11138       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11139           || get_attr_conds (insn) == CONDS_USE)
11140         {
11141           *cost = 0;
11142           return false;
11143         }
11144     }
11145
11146   return true;
11147 }
11148
11149 /* Implement TARGET_REGISTER_MOVE_COST.
11150
11151    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11152    it is typically more expensive than a single memory access.  We set
11153    the cost to less than two memory accesses so that floating
11154    point to integer conversion does not go through memory.  */
11155
11156 int
11157 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11158                         reg_class_t from, reg_class_t to)
11159 {
11160   if (TARGET_32BIT)
11161     {
11162       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11163           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11164         return 15;
11165       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11166                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11167         return 4;
11168       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11169         return 20;
11170       else
11171         return 2;
11172     }
11173   else
11174     {
11175       if (from == HI_REGS || to == HI_REGS)
11176         return 4;
11177       else
11178         return 2;
11179     }
11180 }
11181
11182 /* Implement TARGET_MEMORY_MOVE_COST.  */
11183
11184 int
11185 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11186                       bool in ATTRIBUTE_UNUSED)
11187 {
11188   if (TARGET_32BIT)
11189     return 10;
11190   else
11191     {
11192       if (GET_MODE_SIZE (mode) < 4)
11193         return 8;
11194       else
11195         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11196     }
11197 }
11198
11199 /* Vectorizer cost model implementation.  */
11200
11201 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11202 static int
11203 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11204                                 tree vectype,
11205                                 int misalign ATTRIBUTE_UNUSED)
11206 {
11207   unsigned elements;
11208
11209   switch (type_of_cost)
11210     {
11211       case scalar_stmt:
11212         return current_tune->vec_costs->scalar_stmt_cost;
11213
11214       case scalar_load:
11215         return current_tune->vec_costs->scalar_load_cost;
11216
11217       case scalar_store:
11218         return current_tune->vec_costs->scalar_store_cost;
11219
11220       case vector_stmt:
11221         return current_tune->vec_costs->vec_stmt_cost;
11222
11223       case vector_load:
11224         return current_tune->vec_costs->vec_align_load_cost;
11225
11226       case vector_store:
11227         return current_tune->vec_costs->vec_store_cost;
11228
11229       case vec_to_scalar:
11230         return current_tune->vec_costs->vec_to_scalar_cost;
11231
11232       case scalar_to_vec:
11233         return current_tune->vec_costs->scalar_to_vec_cost;
11234
11235       case unaligned_load:
11236         return current_tune->vec_costs->vec_unalign_load_cost;
11237
11238       case unaligned_store:
11239         return current_tune->vec_costs->vec_unalign_store_cost;
11240
11241       case cond_branch_taken:
11242         return current_tune->vec_costs->cond_taken_branch_cost;
11243
11244       case cond_branch_not_taken:
11245         return current_tune->vec_costs->cond_not_taken_branch_cost;
11246
11247       case vec_perm:
11248       case vec_promote_demote:
11249         return current_tune->vec_costs->vec_stmt_cost;
11250
11251       case vec_construct:
11252         elements = TYPE_VECTOR_SUBPARTS (vectype);
11253         return elements / 2 + 1;
11254
11255       default:
11256         gcc_unreachable ();
11257     }
11258 }
11259
11260 /* Implement targetm.vectorize.add_stmt_cost.  */
11261
11262 static unsigned
11263 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11264                    struct _stmt_vec_info *stmt_info, int misalign,
11265                    enum vect_cost_model_location where)
11266 {
11267   unsigned *cost = (unsigned *) data;
11268   unsigned retval = 0;
11269
11270   if (flag_vect_cost_model)
11271     {
11272       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11273       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11274
11275       /* Statements in an inner loop relative to the loop being
11276          vectorized are weighted more heavily.  The value here is
11277          arbitrary and could potentially be improved with analysis.  */
11278       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11279         count *= 50;  /* FIXME.  */
11280
11281       retval = (unsigned) (count * stmt_cost);
11282       cost[where] += retval;
11283     }
11284
11285   return retval;
11286 }
11287
11288 /* Return true if and only if this insn can dual-issue only as older.  */
11289 static bool
11290 cortexa7_older_only (rtx_insn *insn)
11291 {
11292   if (recog_memoized (insn) < 0)
11293     return false;
11294
11295   switch (get_attr_type (insn))
11296     {
11297     case TYPE_ALU_DSP_REG:
11298     case TYPE_ALU_SREG:
11299     case TYPE_ALUS_SREG:
11300     case TYPE_LOGIC_REG:
11301     case TYPE_LOGICS_REG:
11302     case TYPE_ADC_REG:
11303     case TYPE_ADCS_REG:
11304     case TYPE_ADR:
11305     case TYPE_BFM:
11306     case TYPE_REV:
11307     case TYPE_MVN_REG:
11308     case TYPE_SHIFT_IMM:
11309     case TYPE_SHIFT_REG:
11310     case TYPE_LOAD_BYTE:
11311     case TYPE_LOAD1:
11312     case TYPE_STORE1:
11313     case TYPE_FFARITHS:
11314     case TYPE_FADDS:
11315     case TYPE_FFARITHD:
11316     case TYPE_FADDD:
11317     case TYPE_FMOV:
11318     case TYPE_F_CVT:
11319     case TYPE_FCMPS:
11320     case TYPE_FCMPD:
11321     case TYPE_FCONSTS:
11322     case TYPE_FCONSTD:
11323     case TYPE_FMULS:
11324     case TYPE_FMACS:
11325     case TYPE_FMULD:
11326     case TYPE_FMACD:
11327     case TYPE_FDIVS:
11328     case TYPE_FDIVD:
11329     case TYPE_F_MRC:
11330     case TYPE_F_MRRC:
11331     case TYPE_F_FLAG:
11332     case TYPE_F_LOADS:
11333     case TYPE_F_STORES:
11334       return true;
11335     default:
11336       return false;
11337     }
11338 }
11339
11340 /* Return true if and only if this insn can dual-issue as younger.  */
11341 static bool
11342 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11343 {
11344   if (recog_memoized (insn) < 0)
11345     {
11346       if (verbose > 5)
11347         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11348       return false;
11349     }
11350
11351   switch (get_attr_type (insn))
11352     {
11353     case TYPE_ALU_IMM:
11354     case TYPE_ALUS_IMM:
11355     case TYPE_LOGIC_IMM:
11356     case TYPE_LOGICS_IMM:
11357     case TYPE_EXTEND:
11358     case TYPE_MVN_IMM:
11359     case TYPE_MOV_IMM:
11360     case TYPE_MOV_REG:
11361     case TYPE_MOV_SHIFT:
11362     case TYPE_MOV_SHIFT_REG:
11363     case TYPE_BRANCH:
11364     case TYPE_CALL:
11365       return true;
11366     default:
11367       return false;
11368     }
11369 }
11370
11371
11372 /* Look for an instruction that can dual issue only as an older
11373    instruction, and move it in front of any instructions that can
11374    dual-issue as younger, while preserving the relative order of all
11375    other instructions in the ready list.  This is a hueuristic to help
11376    dual-issue in later cycles, by postponing issue of more flexible
11377    instructions.  This heuristic may affect dual issue opportunities
11378    in the current cycle.  */
11379 static void
11380 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11381                         int *n_readyp, int clock)
11382 {
11383   int i;
11384   int first_older_only = -1, first_younger = -1;
11385
11386   if (verbose > 5)
11387     fprintf (file,
11388              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11389              clock,
11390              *n_readyp);
11391
11392   /* Traverse the ready list from the head (the instruction to issue
11393      first), and looking for the first instruction that can issue as
11394      younger and the first instruction that can dual-issue only as
11395      older.  */
11396   for (i = *n_readyp - 1; i >= 0; i--)
11397     {
11398       rtx_insn *insn = ready[i];
11399       if (cortexa7_older_only (insn))
11400         {
11401           first_older_only = i;
11402           if (verbose > 5)
11403             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11404           break;
11405         }
11406       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11407         first_younger = i;
11408     }
11409
11410   /* Nothing to reorder because either no younger insn found or insn
11411      that can dual-issue only as older appears before any insn that
11412      can dual-issue as younger.  */
11413   if (first_younger == -1)
11414     {
11415       if (verbose > 5)
11416         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11417       return;
11418     }
11419
11420   /* Nothing to reorder because no older-only insn in the ready list.  */
11421   if (first_older_only == -1)
11422     {
11423       if (verbose > 5)
11424         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11425       return;
11426     }
11427
11428   /* Move first_older_only insn before first_younger.  */
11429   if (verbose > 5)
11430     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11431              INSN_UID(ready [first_older_only]),
11432              INSN_UID(ready [first_younger]));
11433   rtx_insn *first_older_only_insn = ready [first_older_only];
11434   for (i = first_older_only; i < first_younger; i++)
11435     {
11436       ready[i] = ready[i+1];
11437     }
11438
11439   ready[i] = first_older_only_insn;
11440   return;
11441 }
11442
11443 /* Implement TARGET_SCHED_REORDER. */
11444 static int
11445 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11446                    int clock)
11447 {
11448   switch (arm_tune)
11449     {
11450     case TARGET_CPU_cortexa7:
11451       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11452       break;
11453     default:
11454       /* Do nothing for other cores.  */
11455       break;
11456     }
11457
11458   return arm_issue_rate ();
11459 }
11460
11461 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11462    It corrects the value of COST based on the relationship between
11463    INSN and DEP through the dependence LINK.  It returns the new
11464    value. There is a per-core adjust_cost hook to adjust scheduler costs
11465    and the per-core hook can choose to completely override the generic
11466    adjust_cost function. Only put bits of code into arm_adjust_cost that
11467    are common across all cores.  */
11468 static int
11469 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11470                  unsigned int)
11471 {
11472   rtx i_pat, d_pat;
11473
11474  /* When generating Thumb-1 code, we want to place flag-setting operations
11475     close to a conditional branch which depends on them, so that we can
11476     omit the comparison. */
11477   if (TARGET_THUMB1
11478       && dep_type == 0
11479       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11480       && recog_memoized (dep) >= 0
11481       && get_attr_conds (dep) == CONDS_SET)
11482     return 0;
11483
11484   if (current_tune->sched_adjust_cost != NULL)
11485     {
11486       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11487         return cost;
11488     }
11489
11490   /* XXX Is this strictly true?  */
11491   if (dep_type == REG_DEP_ANTI
11492       || dep_type == REG_DEP_OUTPUT)
11493     return 0;
11494
11495   /* Call insns don't incur a stall, even if they follow a load.  */
11496   if (dep_type == 0
11497       && CALL_P (insn))
11498     return 1;
11499
11500   if ((i_pat = single_set (insn)) != NULL
11501       && MEM_P (SET_SRC (i_pat))
11502       && (d_pat = single_set (dep)) != NULL
11503       && MEM_P (SET_DEST (d_pat)))
11504     {
11505       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11506       /* This is a load after a store, there is no conflict if the load reads
11507          from a cached area.  Assume that loads from the stack, and from the
11508          constant pool are cached, and that others will miss.  This is a
11509          hack.  */
11510
11511       if ((GET_CODE (src_mem) == SYMBOL_REF
11512            && CONSTANT_POOL_ADDRESS_P (src_mem))
11513           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11514           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11515           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11516         return 1;
11517     }
11518
11519   return cost;
11520 }
11521
11522 int
11523 arm_max_conditional_execute (void)
11524 {
11525   return max_insns_skipped;
11526 }
11527
11528 static int
11529 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11530 {
11531   if (TARGET_32BIT)
11532     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11533   else
11534     return (optimize > 0) ? 2 : 0;
11535 }
11536
11537 static int
11538 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11539 {
11540   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11541 }
11542
11543 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11544    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11545    sequences of non-executed instructions in IT blocks probably take the same
11546    amount of time as executed instructions (and the IT instruction itself takes
11547    space in icache).  This function was experimentally determined to give good
11548    results on a popular embedded benchmark.  */
11549
11550 static int
11551 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11552 {
11553   return (TARGET_32BIT && speed_p) ? 1
11554          : arm_default_branch_cost (speed_p, predictable_p);
11555 }
11556
11557 static int
11558 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11559 {
11560   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11561 }
11562
11563 static bool fp_consts_inited = false;
11564
11565 static REAL_VALUE_TYPE value_fp0;
11566
11567 static void
11568 init_fp_table (void)
11569 {
11570   REAL_VALUE_TYPE r;
11571
11572   r = REAL_VALUE_ATOF ("0", DFmode);
11573   value_fp0 = r;
11574   fp_consts_inited = true;
11575 }
11576
11577 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11578 int
11579 arm_const_double_rtx (rtx x)
11580 {
11581   const REAL_VALUE_TYPE *r;
11582
11583   if (!fp_consts_inited)
11584     init_fp_table ();
11585
11586   r = CONST_DOUBLE_REAL_VALUE (x);
11587   if (REAL_VALUE_MINUS_ZERO (*r))
11588     return 0;
11589
11590   if (real_equal (r, &value_fp0))
11591     return 1;
11592
11593   return 0;
11594 }
11595
11596 /* VFPv3 has a fairly wide range of representable immediates, formed from
11597    "quarter-precision" floating-point values. These can be evaluated using this
11598    formula (with ^ for exponentiation):
11599
11600      -1^s * n * 2^-r
11601
11602    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11603    16 <= n <= 31 and 0 <= r <= 7.
11604
11605    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11606
11607      - A (most-significant) is the sign bit.
11608      - BCD are the exponent (encoded as r XOR 3).
11609      - EFGH are the mantissa (encoded as n - 16).
11610 */
11611
11612 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11613    fconst[sd] instruction, or -1 if X isn't suitable.  */
11614 static int
11615 vfp3_const_double_index (rtx x)
11616 {
11617   REAL_VALUE_TYPE r, m;
11618   int sign, exponent;
11619   unsigned HOST_WIDE_INT mantissa, mant_hi;
11620   unsigned HOST_WIDE_INT mask;
11621   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11622   bool fail;
11623
11624   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11625     return -1;
11626
11627   r = *CONST_DOUBLE_REAL_VALUE (x);
11628
11629   /* We can't represent these things, so detect them first.  */
11630   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11631     return -1;
11632
11633   /* Extract sign, exponent and mantissa.  */
11634   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11635   r = real_value_abs (&r);
11636   exponent = REAL_EXP (&r);
11637   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11638      highest (sign) bit, with a fixed binary point at bit point_pos.
11639      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11640      bits for the mantissa, this may fail (low bits would be lost).  */
11641   real_ldexp (&m, &r, point_pos - exponent);
11642   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11643   mantissa = w.elt (0);
11644   mant_hi = w.elt (1);
11645
11646   /* If there are bits set in the low part of the mantissa, we can't
11647      represent this value.  */
11648   if (mantissa != 0)
11649     return -1;
11650
11651   /* Now make it so that mantissa contains the most-significant bits, and move
11652      the point_pos to indicate that the least-significant bits have been
11653      discarded.  */
11654   point_pos -= HOST_BITS_PER_WIDE_INT;
11655   mantissa = mant_hi;
11656
11657   /* We can permit four significant bits of mantissa only, plus a high bit
11658      which is always 1.  */
11659   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11660   if ((mantissa & mask) != 0)
11661     return -1;
11662
11663   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11664   mantissa >>= point_pos - 5;
11665
11666   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11667      floating-point immediate zero with Neon using an integer-zero load, but
11668      that case is handled elsewhere.)  */
11669   if (mantissa == 0)
11670     return -1;
11671
11672   gcc_assert (mantissa >= 16 && mantissa <= 31);
11673
11674   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11675      normalized significands are in the range [1, 2). (Our mantissa is shifted
11676      left 4 places at this point relative to normalized IEEE754 values).  GCC
11677      internally uses [0.5, 1) (see real.c), so the exponent returned from
11678      REAL_EXP must be altered.  */
11679   exponent = 5 - exponent;
11680
11681   if (exponent < 0 || exponent > 7)
11682     return -1;
11683
11684   /* Sign, mantissa and exponent are now in the correct form to plug into the
11685      formula described in the comment above.  */
11686   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11687 }
11688
11689 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11690 int
11691 vfp3_const_double_rtx (rtx x)
11692 {
11693   if (!TARGET_VFP3)
11694     return 0;
11695
11696   return vfp3_const_double_index (x) != -1;
11697 }
11698
11699 /* Recognize immediates which can be used in various Neon instructions. Legal
11700    immediates are described by the following table (for VMVN variants, the
11701    bitwise inverse of the constant shown is recognized. In either case, VMOV
11702    is output and the correct instruction to use for a given constant is chosen
11703    by the assembler). The constant shown is replicated across all elements of
11704    the destination vector.
11705
11706    insn elems variant constant (binary)
11707    ---- ----- ------- -----------------
11708    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11709    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11710    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11711    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11712    vmov  i16     4    00000000 abcdefgh
11713    vmov  i16     5    abcdefgh 00000000
11714    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11715    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11716    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11717    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11718    vmvn  i16    10    00000000 abcdefgh
11719    vmvn  i16    11    abcdefgh 00000000
11720    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11721    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11722    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11723    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11724    vmov   i8    16    abcdefgh
11725    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11726                       eeeeeeee ffffffff gggggggg hhhhhhhh
11727    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11728    vmov  f32    19    00000000 00000000 00000000 00000000
11729
11730    For case 18, B = !b. Representable values are exactly those accepted by
11731    vfp3_const_double_index, but are output as floating-point numbers rather
11732    than indices.
11733
11734    For case 19, we will change it to vmov.i32 when assembling.
11735
11736    Variants 0-5 (inclusive) may also be used as immediates for the second
11737    operand of VORR/VBIC instructions.
11738
11739    The INVERSE argument causes the bitwise inverse of the given operand to be
11740    recognized instead (used for recognizing legal immediates for the VAND/VORN
11741    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11742    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11743    output, rather than the real insns vbic/vorr).
11744
11745    INVERSE makes no difference to the recognition of float vectors.
11746
11747    The return value is the variant of immediate as shown in the above table, or
11748    -1 if the given value doesn't match any of the listed patterns.
11749 */
11750 static int
11751 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11752                       rtx *modconst, int *elementwidth)
11753 {
11754 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11755   matches = 1;                                  \
11756   for (i = 0; i < idx; i += (STRIDE))           \
11757     if (!(TEST))                                \
11758       matches = 0;                              \
11759   if (matches)                                  \
11760     {                                           \
11761       immtype = (CLASS);                        \
11762       elsize = (ELSIZE);                        \
11763       break;                                    \
11764     }
11765
11766   unsigned int i, elsize = 0, idx = 0, n_elts;
11767   unsigned int innersize;
11768   unsigned char bytes[16];
11769   int immtype = -1, matches;
11770   unsigned int invmask = inverse ? 0xff : 0;
11771   bool vector = GET_CODE (op) == CONST_VECTOR;
11772
11773   if (vector)
11774     n_elts = CONST_VECTOR_NUNITS (op);
11775   else
11776     {
11777       n_elts = 1;
11778       if (mode == VOIDmode)
11779         mode = DImode;
11780     }
11781
11782   innersize = GET_MODE_UNIT_SIZE (mode);
11783
11784   /* Vectors of float constants.  */
11785   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11786     {
11787       rtx el0 = CONST_VECTOR_ELT (op, 0);
11788
11789       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11790         return -1;
11791
11792       /* FP16 vectors cannot be represented.  */
11793       if (GET_MODE_INNER (mode) == HFmode)
11794         return -1;
11795
11796       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11797          are distinct in this context.  */
11798       if (!const_vec_duplicate_p (op))
11799         return -1;
11800
11801       if (modconst)
11802         *modconst = CONST_VECTOR_ELT (op, 0);
11803
11804       if (elementwidth)
11805         *elementwidth = 0;
11806
11807       if (el0 == CONST0_RTX (GET_MODE (el0)))
11808         return 19;
11809       else
11810         return 18;
11811     }
11812
11813   /* The tricks done in the code below apply for little-endian vector layout.
11814      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11815      FIXME: Implement logic for big-endian vectors.  */
11816   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11817     return -1;
11818
11819   /* Splat vector constant out into a byte vector.  */
11820   for (i = 0; i < n_elts; i++)
11821     {
11822       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11823       unsigned HOST_WIDE_INT elpart;
11824
11825       gcc_assert (CONST_INT_P (el));
11826       elpart = INTVAL (el);
11827
11828       for (unsigned int byte = 0; byte < innersize; byte++)
11829         {
11830           bytes[idx++] = (elpart & 0xff) ^ invmask;
11831           elpart >>= BITS_PER_UNIT;
11832         }
11833     }
11834
11835   /* Sanity check.  */
11836   gcc_assert (idx == GET_MODE_SIZE (mode));
11837
11838   do
11839     {
11840       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11841                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11842
11843       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11844                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11845
11846       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11847                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11848
11849       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11850                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11851
11852       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11853
11854       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11855
11856       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11857                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11858
11859       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11860                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11861
11862       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11863                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11864
11865       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11866                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11867
11868       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11869
11870       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11871
11872       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11873                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11874
11875       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11876                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11877
11878       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11879                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11880
11881       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11882                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11883
11884       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11885
11886       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11887                         && bytes[i] == bytes[(i + 8) % idx]);
11888     }
11889   while (0);
11890
11891   if (immtype == -1)
11892     return -1;
11893
11894   if (elementwidth)
11895     *elementwidth = elsize;
11896
11897   if (modconst)
11898     {
11899       unsigned HOST_WIDE_INT imm = 0;
11900
11901       /* Un-invert bytes of recognized vector, if necessary.  */
11902       if (invmask != 0)
11903         for (i = 0; i < idx; i++)
11904           bytes[i] ^= invmask;
11905
11906       if (immtype == 17)
11907         {
11908           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11909           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11910
11911           for (i = 0; i < 8; i++)
11912             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11913                    << (i * BITS_PER_UNIT);
11914
11915           *modconst = GEN_INT (imm);
11916         }
11917       else
11918         {
11919           unsigned HOST_WIDE_INT imm = 0;
11920
11921           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11922             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11923
11924           *modconst = GEN_INT (imm);
11925         }
11926     }
11927
11928   return immtype;
11929 #undef CHECK
11930 }
11931
11932 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11933    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11934    float elements), and a modified constant (whatever should be output for a
11935    VMOV) in *MODCONST.  */
11936
11937 int
11938 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11939                                rtx *modconst, int *elementwidth)
11940 {
11941   rtx tmpconst;
11942   int tmpwidth;
11943   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11944
11945   if (retval == -1)
11946     return 0;
11947
11948   if (modconst)
11949     *modconst = tmpconst;
11950
11951   if (elementwidth)
11952     *elementwidth = tmpwidth;
11953
11954   return 1;
11955 }
11956
11957 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11958    the immediate is valid, write a constant suitable for using as an operand
11959    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11960    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11961
11962 int
11963 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11964                                 rtx *modconst, int *elementwidth)
11965 {
11966   rtx tmpconst;
11967   int tmpwidth;
11968   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11969
11970   if (retval < 0 || retval > 5)
11971     return 0;
11972
11973   if (modconst)
11974     *modconst = tmpconst;
11975
11976   if (elementwidth)
11977     *elementwidth = tmpwidth;
11978
11979   return 1;
11980 }
11981
11982 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11983    the immediate is valid, write a constant suitable for using as an operand
11984    to VSHR/VSHL to *MODCONST and the corresponding element width to
11985    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11986    because they have different limitations.  */
11987
11988 int
11989 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11990                                 rtx *modconst, int *elementwidth,
11991                                 bool isleftshift)
11992 {
11993   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11994   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11995   unsigned HOST_WIDE_INT last_elt = 0;
11996   unsigned HOST_WIDE_INT maxshift;
11997
11998   /* Split vector constant out into a byte vector.  */
11999   for (i = 0; i < n_elts; i++)
12000     {
12001       rtx el = CONST_VECTOR_ELT (op, i);
12002       unsigned HOST_WIDE_INT elpart;
12003
12004       if (CONST_INT_P (el))
12005         elpart = INTVAL (el);
12006       else if (CONST_DOUBLE_P (el))
12007         return 0;
12008       else
12009         gcc_unreachable ();
12010
12011       if (i != 0 && elpart != last_elt)
12012         return 0;
12013
12014       last_elt = elpart;
12015     }
12016
12017   /* Shift less than element size.  */
12018   maxshift = innersize * 8;
12019
12020   if (isleftshift)
12021     {
12022       /* Left shift immediate value can be from 0 to <size>-1.  */
12023       if (last_elt >= maxshift)
12024         return 0;
12025     }
12026   else
12027     {
12028       /* Right shift immediate value can be from 1 to <size>.  */
12029       if (last_elt == 0 || last_elt > maxshift)
12030         return 0;
12031     }
12032
12033   if (elementwidth)
12034     *elementwidth = innersize * 8;
12035
12036   if (modconst)
12037     *modconst = CONST_VECTOR_ELT (op, 0);
12038
12039   return 1;
12040 }
12041
12042 /* Return a string suitable for output of Neon immediate logic operation
12043    MNEM.  */
12044
12045 char *
12046 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12047                              int inverse, int quad)
12048 {
12049   int width, is_valid;
12050   static char templ[40];
12051
12052   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12053
12054   gcc_assert (is_valid != 0);
12055
12056   if (quad)
12057     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12058   else
12059     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12060
12061   return templ;
12062 }
12063
12064 /* Return a string suitable for output of Neon immediate shift operation
12065    (VSHR or VSHL) MNEM.  */
12066
12067 char *
12068 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12069                              machine_mode mode, int quad,
12070                              bool isleftshift)
12071 {
12072   int width, is_valid;
12073   static char templ[40];
12074
12075   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12076   gcc_assert (is_valid != 0);
12077
12078   if (quad)
12079     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12080   else
12081     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12082
12083   return templ;
12084 }
12085
12086 /* Output a sequence of pairwise operations to implement a reduction.
12087    NOTE: We do "too much work" here, because pairwise operations work on two
12088    registers-worth of operands in one go. Unfortunately we can't exploit those
12089    extra calculations to do the full operation in fewer steps, I don't think.
12090    Although all vector elements of the result but the first are ignored, we
12091    actually calculate the same result in each of the elements. An alternative
12092    such as initially loading a vector with zero to use as each of the second
12093    operands would use up an additional register and take an extra instruction,
12094    for no particular gain.  */
12095
12096 void
12097 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12098                       rtx (*reduc) (rtx, rtx, rtx))
12099 {
12100   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12101   rtx tmpsum = op1;
12102
12103   for (i = parts / 2; i >= 1; i /= 2)
12104     {
12105       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12106       emit_insn (reduc (dest, tmpsum, tmpsum));
12107       tmpsum = dest;
12108     }
12109 }
12110
12111 /* If VALS is a vector constant that can be loaded into a register
12112    using VDUP, generate instructions to do so and return an RTX to
12113    assign to the register.  Otherwise return NULL_RTX.  */
12114
12115 static rtx
12116 neon_vdup_constant (rtx vals)
12117 {
12118   machine_mode mode = GET_MODE (vals);
12119   machine_mode inner_mode = GET_MODE_INNER (mode);
12120   rtx x;
12121
12122   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12123     return NULL_RTX;
12124
12125   if (!const_vec_duplicate_p (vals, &x))
12126     /* The elements are not all the same.  We could handle repeating
12127        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12128        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12129        vdup.i16).  */
12130     return NULL_RTX;
12131
12132   /* We can load this constant by using VDUP and a constant in a
12133      single ARM register.  This will be cheaper than a vector
12134      load.  */
12135
12136   x = copy_to_mode_reg (inner_mode, x);
12137   return gen_rtx_VEC_DUPLICATE (mode, x);
12138 }
12139
12140 /* Generate code to load VALS, which is a PARALLEL containing only
12141    constants (for vec_init) or CONST_VECTOR, efficiently into a
12142    register.  Returns an RTX to copy into the register, or NULL_RTX
12143    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12144
12145 rtx
12146 neon_make_constant (rtx vals)
12147 {
12148   machine_mode mode = GET_MODE (vals);
12149   rtx target;
12150   rtx const_vec = NULL_RTX;
12151   int n_elts = GET_MODE_NUNITS (mode);
12152   int n_const = 0;
12153   int i;
12154
12155   if (GET_CODE (vals) == CONST_VECTOR)
12156     const_vec = vals;
12157   else if (GET_CODE (vals) == PARALLEL)
12158     {
12159       /* A CONST_VECTOR must contain only CONST_INTs and
12160          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12161          Only store valid constants in a CONST_VECTOR.  */
12162       for (i = 0; i < n_elts; ++i)
12163         {
12164           rtx x = XVECEXP (vals, 0, i);
12165           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12166             n_const++;
12167         }
12168       if (n_const == n_elts)
12169         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12170     }
12171   else
12172     gcc_unreachable ();
12173
12174   if (const_vec != NULL
12175       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12176     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12177     return const_vec;
12178   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12179     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12180        pipeline cycle; creating the constant takes one or two ARM
12181        pipeline cycles.  */
12182     return target;
12183   else if (const_vec != NULL_RTX)
12184     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12185        (for either double or quad vectors).  We can not take advantage
12186        of single-cycle VLD1 because we need a PC-relative addressing
12187        mode.  */
12188     return const_vec;
12189   else
12190     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12191        We can not construct an initializer.  */
12192     return NULL_RTX;
12193 }
12194
12195 /* Initialize vector TARGET to VALS.  */
12196
12197 void
12198 neon_expand_vector_init (rtx target, rtx vals)
12199 {
12200   machine_mode mode = GET_MODE (target);
12201   machine_mode inner_mode = GET_MODE_INNER (mode);
12202   int n_elts = GET_MODE_NUNITS (mode);
12203   int n_var = 0, one_var = -1;
12204   bool all_same = true;
12205   rtx x, mem;
12206   int i;
12207
12208   for (i = 0; i < n_elts; ++i)
12209     {
12210       x = XVECEXP (vals, 0, i);
12211       if (!CONSTANT_P (x))
12212         ++n_var, one_var = i;
12213
12214       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12215         all_same = false;
12216     }
12217
12218   if (n_var == 0)
12219     {
12220       rtx constant = neon_make_constant (vals);
12221       if (constant != NULL_RTX)
12222         {
12223           emit_move_insn (target, constant);
12224           return;
12225         }
12226     }
12227
12228   /* Splat a single non-constant element if we can.  */
12229   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12230     {
12231       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12232       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12233       return;
12234     }
12235
12236   /* One field is non-constant.  Load constant then overwrite varying
12237      field.  This is more efficient than using the stack.  */
12238   if (n_var == 1)
12239     {
12240       rtx copy = copy_rtx (vals);
12241       rtx index = GEN_INT (one_var);
12242
12243       /* Load constant part of vector, substitute neighboring value for
12244          varying element.  */
12245       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12246       neon_expand_vector_init (target, copy);
12247
12248       /* Insert variable.  */
12249       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12250       switch (mode)
12251         {
12252         case E_V8QImode:
12253           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12254           break;
12255         case E_V16QImode:
12256           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12257           break;
12258         case E_V4HImode:
12259           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12260           break;
12261         case E_V8HImode:
12262           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12263           break;
12264         case E_V2SImode:
12265           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12266           break;
12267         case E_V4SImode:
12268           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12269           break;
12270         case E_V2SFmode:
12271           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12272           break;
12273         case E_V4SFmode:
12274           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12275           break;
12276         case E_V2DImode:
12277           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12278           break;
12279         default:
12280           gcc_unreachable ();
12281         }
12282       return;
12283     }
12284
12285   /* Construct the vector in memory one field at a time
12286      and load the whole vector.  */
12287   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12288   for (i = 0; i < n_elts; i++)
12289     emit_move_insn (adjust_address_nv (mem, inner_mode,
12290                                     i * GET_MODE_SIZE (inner_mode)),
12291                     XVECEXP (vals, 0, i));
12292   emit_move_insn (target, mem);
12293 }
12294
12295 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12296    ERR if it doesn't.  EXP indicates the source location, which includes the
12297    inlining history for intrinsics.  */
12298
12299 static void
12300 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12301               const_tree exp, const char *desc)
12302 {
12303   HOST_WIDE_INT lane;
12304
12305   gcc_assert (CONST_INT_P (operand));
12306
12307   lane = INTVAL (operand);
12308
12309   if (lane < low || lane >= high)
12310     {
12311       if (exp)
12312         error ("%K%s %wd out of range %wd - %wd",
12313                exp, desc, lane, low, high - 1);
12314       else
12315         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12316     }
12317 }
12318
12319 /* Bounds-check lanes.  */
12320
12321 void
12322 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12323                   const_tree exp)
12324 {
12325   bounds_check (operand, low, high, exp, "lane");
12326 }
12327
12328 /* Bounds-check constants.  */
12329
12330 void
12331 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12332 {
12333   bounds_check (operand, low, high, NULL_TREE, "constant");
12334 }
12335
12336 HOST_WIDE_INT
12337 neon_element_bits (machine_mode mode)
12338 {
12339   return GET_MODE_UNIT_BITSIZE (mode);
12340 }
12341
12342 \f
12343 /* Predicates for `match_operand' and `match_operator'.  */
12344
12345 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12346    WB is true if full writeback address modes are allowed and is false
12347    if limited writeback address modes (POST_INC and PRE_DEC) are
12348    allowed.  */
12349
12350 int
12351 arm_coproc_mem_operand (rtx op, bool wb)
12352 {
12353   rtx ind;
12354
12355   /* Reject eliminable registers.  */
12356   if (! (reload_in_progress || reload_completed || lra_in_progress)
12357       && (   reg_mentioned_p (frame_pointer_rtx, op)
12358           || reg_mentioned_p (arg_pointer_rtx, op)
12359           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12360           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12361           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12362           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12363     return FALSE;
12364
12365   /* Constants are converted into offsets from labels.  */
12366   if (!MEM_P (op))
12367     return FALSE;
12368
12369   ind = XEXP (op, 0);
12370
12371   if (reload_completed
12372       && (GET_CODE (ind) == LABEL_REF
12373           || (GET_CODE (ind) == CONST
12374               && GET_CODE (XEXP (ind, 0)) == PLUS
12375               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12376               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12377     return TRUE;
12378
12379   /* Match: (mem (reg)).  */
12380   if (REG_P (ind))
12381     return arm_address_register_rtx_p (ind, 0);
12382
12383   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12384      acceptable in any case (subject to verification by
12385      arm_address_register_rtx_p).  We need WB to be true to accept
12386      PRE_INC and POST_DEC.  */
12387   if (GET_CODE (ind) == POST_INC
12388       || GET_CODE (ind) == PRE_DEC
12389       || (wb
12390           && (GET_CODE (ind) == PRE_INC
12391               || GET_CODE (ind) == POST_DEC)))
12392     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12393
12394   if (wb
12395       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12396       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12397       && GET_CODE (XEXP (ind, 1)) == PLUS
12398       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12399     ind = XEXP (ind, 1);
12400
12401   /* Match:
12402      (plus (reg)
12403            (const)).  */
12404   if (GET_CODE (ind) == PLUS
12405       && REG_P (XEXP (ind, 0))
12406       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12407       && CONST_INT_P (XEXP (ind, 1))
12408       && INTVAL (XEXP (ind, 1)) > -1024
12409       && INTVAL (XEXP (ind, 1)) <  1024
12410       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12411     return TRUE;
12412
12413   return FALSE;
12414 }
12415
12416 /* Return TRUE if OP is a memory operand which we can load or store a vector
12417    to/from. TYPE is one of the following values:
12418     0 - Vector load/stor (vldr)
12419     1 - Core registers (ldm)
12420     2 - Element/structure loads (vld1)
12421  */
12422 int
12423 neon_vector_mem_operand (rtx op, int type, bool strict)
12424 {
12425   rtx ind;
12426
12427   /* Reject eliminable registers.  */
12428   if (strict && ! (reload_in_progress || reload_completed)
12429       && (reg_mentioned_p (frame_pointer_rtx, op)
12430           || reg_mentioned_p (arg_pointer_rtx, op)
12431           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12432           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12433           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12434           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12435     return FALSE;
12436
12437   /* Constants are converted into offsets from labels.  */
12438   if (!MEM_P (op))
12439     return FALSE;
12440
12441   ind = XEXP (op, 0);
12442
12443   if (reload_completed
12444       && (GET_CODE (ind) == LABEL_REF
12445           || (GET_CODE (ind) == CONST
12446               && GET_CODE (XEXP (ind, 0)) == PLUS
12447               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12448               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12449     return TRUE;
12450
12451   /* Match: (mem (reg)).  */
12452   if (REG_P (ind))
12453     return arm_address_register_rtx_p (ind, 0);
12454
12455   /* Allow post-increment with Neon registers.  */
12456   if ((type != 1 && GET_CODE (ind) == POST_INC)
12457       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12458     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12459
12460   /* Allow post-increment by register for VLDn */
12461   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12462       && GET_CODE (XEXP (ind, 1)) == PLUS
12463       && REG_P (XEXP (XEXP (ind, 1), 1)))
12464      return true;
12465
12466   /* Match:
12467      (plus (reg)
12468           (const)).  */
12469   if (type == 0
12470       && GET_CODE (ind) == PLUS
12471       && REG_P (XEXP (ind, 0))
12472       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12473       && CONST_INT_P (XEXP (ind, 1))
12474       && INTVAL (XEXP (ind, 1)) > -1024
12475       /* For quad modes, we restrict the constant offset to be slightly less
12476          than what the instruction format permits.  We have no such constraint
12477          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12478       && (INTVAL (XEXP (ind, 1))
12479           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12480       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12481     return TRUE;
12482
12483   return FALSE;
12484 }
12485
12486 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12487    type.  */
12488 int
12489 neon_struct_mem_operand (rtx op)
12490 {
12491   rtx ind;
12492
12493   /* Reject eliminable registers.  */
12494   if (! (reload_in_progress || reload_completed)
12495       && (   reg_mentioned_p (frame_pointer_rtx, op)
12496           || reg_mentioned_p (arg_pointer_rtx, op)
12497           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12498           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12499           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12500           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12501     return FALSE;
12502
12503   /* Constants are converted into offsets from labels.  */
12504   if (!MEM_P (op))
12505     return FALSE;
12506
12507   ind = XEXP (op, 0);
12508
12509   if (reload_completed
12510       && (GET_CODE (ind) == LABEL_REF
12511           || (GET_CODE (ind) == CONST
12512               && GET_CODE (XEXP (ind, 0)) == PLUS
12513               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12514               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12515     return TRUE;
12516
12517   /* Match: (mem (reg)).  */
12518   if (REG_P (ind))
12519     return arm_address_register_rtx_p (ind, 0);
12520
12521   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12522   if (GET_CODE (ind) == POST_INC
12523       || GET_CODE (ind) == PRE_DEC)
12524     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12525
12526   return FALSE;
12527 }
12528
12529 /* Return true if X is a register that will be eliminated later on.  */
12530 int
12531 arm_eliminable_register (rtx x)
12532 {
12533   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12534                        || REGNO (x) == ARG_POINTER_REGNUM
12535                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12536                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12537 }
12538
12539 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12540    coprocessor registers.  Otherwise return NO_REGS.  */
12541
12542 enum reg_class
12543 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12544 {
12545   if (mode == HFmode)
12546     {
12547       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12548         return GENERAL_REGS;
12549       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12550         return NO_REGS;
12551       return GENERAL_REGS;
12552     }
12553
12554   /* The neon move patterns handle all legitimate vector and struct
12555      addresses.  */
12556   if (TARGET_NEON
12557       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12558       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12559           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12560           || VALID_NEON_STRUCT_MODE (mode)))
12561     return NO_REGS;
12562
12563   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12564     return NO_REGS;
12565
12566   return GENERAL_REGS;
12567 }
12568
12569 /* Values which must be returned in the most-significant end of the return
12570    register.  */
12571
12572 static bool
12573 arm_return_in_msb (const_tree valtype)
12574 {
12575   return (TARGET_AAPCS_BASED
12576           && BYTES_BIG_ENDIAN
12577           && (AGGREGATE_TYPE_P (valtype)
12578               || TREE_CODE (valtype) == COMPLEX_TYPE
12579               || FIXED_POINT_TYPE_P (valtype)));
12580 }
12581
12582 /* Return TRUE if X references a SYMBOL_REF.  */
12583 int
12584 symbol_mentioned_p (rtx x)
12585 {
12586   const char * fmt;
12587   int i;
12588
12589   if (GET_CODE (x) == SYMBOL_REF)
12590     return 1;
12591
12592   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12593      are constant offsets, not symbols.  */
12594   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12595     return 0;
12596
12597   fmt = GET_RTX_FORMAT (GET_CODE (x));
12598
12599   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12600     {
12601       if (fmt[i] == 'E')
12602         {
12603           int j;
12604
12605           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12606             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12607               return 1;
12608         }
12609       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12610         return 1;
12611     }
12612
12613   return 0;
12614 }
12615
12616 /* Return TRUE if X references a LABEL_REF.  */
12617 int
12618 label_mentioned_p (rtx x)
12619 {
12620   const char * fmt;
12621   int i;
12622
12623   if (GET_CODE (x) == LABEL_REF)
12624     return 1;
12625
12626   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12627      instruction, but they are constant offsets, not symbols.  */
12628   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12629     return 0;
12630
12631   fmt = GET_RTX_FORMAT (GET_CODE (x));
12632   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12633     {
12634       if (fmt[i] == 'E')
12635         {
12636           int j;
12637
12638           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12639             if (label_mentioned_p (XVECEXP (x, i, j)))
12640               return 1;
12641         }
12642       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12643         return 1;
12644     }
12645
12646   return 0;
12647 }
12648
12649 int
12650 tls_mentioned_p (rtx x)
12651 {
12652   switch (GET_CODE (x))
12653     {
12654     case CONST:
12655       return tls_mentioned_p (XEXP (x, 0));
12656
12657     case UNSPEC:
12658       if (XINT (x, 1) == UNSPEC_TLS)
12659         return 1;
12660
12661     /* Fall through.  */
12662     default:
12663       return 0;
12664     }
12665 }
12666
12667 /* Must not copy any rtx that uses a pc-relative address.
12668    Also, disallow copying of load-exclusive instructions that
12669    may appear after splitting of compare-and-swap-style operations
12670    so as to prevent those loops from being transformed away from their
12671    canonical forms (see PR 69904).  */
12672
12673 static bool
12674 arm_cannot_copy_insn_p (rtx_insn *insn)
12675 {
12676   /* The tls call insn cannot be copied, as it is paired with a data
12677      word.  */
12678   if (recog_memoized (insn) == CODE_FOR_tlscall)
12679     return true;
12680
12681   subrtx_iterator::array_type array;
12682   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12683     {
12684       const_rtx x = *iter;
12685       if (GET_CODE (x) == UNSPEC
12686           && (XINT (x, 1) == UNSPEC_PIC_BASE
12687               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12688         return true;
12689     }
12690
12691   rtx set = single_set (insn);
12692   if (set)
12693     {
12694       rtx src = SET_SRC (set);
12695       if (GET_CODE (src) == ZERO_EXTEND)
12696         src = XEXP (src, 0);
12697
12698       /* Catch the load-exclusive and load-acquire operations.  */
12699       if (GET_CODE (src) == UNSPEC_VOLATILE
12700           && (XINT (src, 1) == VUNSPEC_LL
12701               || XINT (src, 1) == VUNSPEC_LAX))
12702         return true;
12703     }
12704   return false;
12705 }
12706
12707 enum rtx_code
12708 minmax_code (rtx x)
12709 {
12710   enum rtx_code code = GET_CODE (x);
12711
12712   switch (code)
12713     {
12714     case SMAX:
12715       return GE;
12716     case SMIN:
12717       return LE;
12718     case UMIN:
12719       return LEU;
12720     case UMAX:
12721       return GEU;
12722     default:
12723       gcc_unreachable ();
12724     }
12725 }
12726
12727 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12728
12729 bool
12730 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12731                         int *mask, bool *signed_sat)
12732 {
12733   /* The high bound must be a power of two minus one.  */
12734   int log = exact_log2 (INTVAL (hi_bound) + 1);
12735   if (log == -1)
12736     return false;
12737
12738   /* The low bound is either zero (for usat) or one less than the
12739      negation of the high bound (for ssat).  */
12740   if (INTVAL (lo_bound) == 0)
12741     {
12742       if (mask)
12743         *mask = log;
12744       if (signed_sat)
12745         *signed_sat = false;
12746
12747       return true;
12748     }
12749
12750   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12751     {
12752       if (mask)
12753         *mask = log + 1;
12754       if (signed_sat)
12755         *signed_sat = true;
12756
12757       return true;
12758     }
12759
12760   return false;
12761 }
12762
12763 /* Return 1 if memory locations are adjacent.  */
12764 int
12765 adjacent_mem_locations (rtx a, rtx b)
12766 {
12767   /* We don't guarantee to preserve the order of these memory refs.  */
12768   if (volatile_refs_p (a) || volatile_refs_p (b))
12769     return 0;
12770
12771   if ((REG_P (XEXP (a, 0))
12772        || (GET_CODE (XEXP (a, 0)) == PLUS
12773            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12774       && (REG_P (XEXP (b, 0))
12775           || (GET_CODE (XEXP (b, 0)) == PLUS
12776               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12777     {
12778       HOST_WIDE_INT val0 = 0, val1 = 0;
12779       rtx reg0, reg1;
12780       int val_diff;
12781
12782       if (GET_CODE (XEXP (a, 0)) == PLUS)
12783         {
12784           reg0 = XEXP (XEXP (a, 0), 0);
12785           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12786         }
12787       else
12788         reg0 = XEXP (a, 0);
12789
12790       if (GET_CODE (XEXP (b, 0)) == PLUS)
12791         {
12792           reg1 = XEXP (XEXP (b, 0), 0);
12793           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12794         }
12795       else
12796         reg1 = XEXP (b, 0);
12797
12798       /* Don't accept any offset that will require multiple
12799          instructions to handle, since this would cause the
12800          arith_adjacentmem pattern to output an overlong sequence.  */
12801       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12802         return 0;
12803
12804       /* Don't allow an eliminable register: register elimination can make
12805          the offset too large.  */
12806       if (arm_eliminable_register (reg0))
12807         return 0;
12808
12809       val_diff = val1 - val0;
12810
12811       if (arm_ld_sched)
12812         {
12813           /* If the target has load delay slots, then there's no benefit
12814              to using an ldm instruction unless the offset is zero and
12815              we are optimizing for size.  */
12816           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12817                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12818                   && (val_diff == 4 || val_diff == -4));
12819         }
12820
12821       return ((REGNO (reg0) == REGNO (reg1))
12822               && (val_diff == 4 || val_diff == -4));
12823     }
12824
12825   return 0;
12826 }
12827
12828 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12829    for load operations, false for store operations.  CONSECUTIVE is true
12830    if the register numbers in the operation must be consecutive in the register
12831    bank. RETURN_PC is true if value is to be loaded in PC.
12832    The pattern we are trying to match for load is:
12833      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12834       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12835        :
12836        :
12837       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12838      ]
12839      where
12840      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12841      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12842      3.  If consecutive is TRUE, then for kth register being loaded,
12843          REGNO (R_dk) = REGNO (R_d0) + k.
12844    The pattern for store is similar.  */
12845 bool
12846 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12847                      bool consecutive, bool return_pc)
12848 {
12849   HOST_WIDE_INT count = XVECLEN (op, 0);
12850   rtx reg, mem, addr;
12851   unsigned regno;
12852   unsigned first_regno;
12853   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12854   rtx elt;
12855   bool addr_reg_in_reglist = false;
12856   bool update = false;
12857   int reg_increment;
12858   int offset_adj;
12859   int regs_per_val;
12860
12861   /* If not in SImode, then registers must be consecutive
12862      (e.g., VLDM instructions for DFmode).  */
12863   gcc_assert ((mode == SImode) || consecutive);
12864   /* Setting return_pc for stores is illegal.  */
12865   gcc_assert (!return_pc || load);
12866
12867   /* Set up the increments and the regs per val based on the mode.  */
12868   reg_increment = GET_MODE_SIZE (mode);
12869   regs_per_val = reg_increment / 4;
12870   offset_adj = return_pc ? 1 : 0;
12871
12872   if (count <= 1
12873       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12874       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12875     return false;
12876
12877   /* Check if this is a write-back.  */
12878   elt = XVECEXP (op, 0, offset_adj);
12879   if (GET_CODE (SET_SRC (elt)) == PLUS)
12880     {
12881       i++;
12882       base = 1;
12883       update = true;
12884
12885       /* The offset adjustment must be the number of registers being
12886          popped times the size of a single register.  */
12887       if (!REG_P (SET_DEST (elt))
12888           || !REG_P (XEXP (SET_SRC (elt), 0))
12889           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12890           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12891           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12892              ((count - 1 - offset_adj) * reg_increment))
12893         return false;
12894     }
12895
12896   i = i + offset_adj;
12897   base = base + offset_adj;
12898   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12899      success depends on the type: VLDM can do just one reg,
12900      LDM must do at least two.  */
12901   if ((count <= i) && (mode == SImode))
12902       return false;
12903
12904   elt = XVECEXP (op, 0, i - 1);
12905   if (GET_CODE (elt) != SET)
12906     return false;
12907
12908   if (load)
12909     {
12910       reg = SET_DEST (elt);
12911       mem = SET_SRC (elt);
12912     }
12913   else
12914     {
12915       reg = SET_SRC (elt);
12916       mem = SET_DEST (elt);
12917     }
12918
12919   if (!REG_P (reg) || !MEM_P (mem))
12920     return false;
12921
12922   regno = REGNO (reg);
12923   first_regno = regno;
12924   addr = XEXP (mem, 0);
12925   if (GET_CODE (addr) == PLUS)
12926     {
12927       if (!CONST_INT_P (XEXP (addr, 1)))
12928         return false;
12929
12930       offset = INTVAL (XEXP (addr, 1));
12931       addr = XEXP (addr, 0);
12932     }
12933
12934   if (!REG_P (addr))
12935     return false;
12936
12937   /* Don't allow SP to be loaded unless it is also the base register. It
12938      guarantees that SP is reset correctly when an LDM instruction
12939      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12940   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12941     return false;
12942
12943   for (; i < count; i++)
12944     {
12945       elt = XVECEXP (op, 0, i);
12946       if (GET_CODE (elt) != SET)
12947         return false;
12948
12949       if (load)
12950         {
12951           reg = SET_DEST (elt);
12952           mem = SET_SRC (elt);
12953         }
12954       else
12955         {
12956           reg = SET_SRC (elt);
12957           mem = SET_DEST (elt);
12958         }
12959
12960       if (!REG_P (reg)
12961           || GET_MODE (reg) != mode
12962           || REGNO (reg) <= regno
12963           || (consecutive
12964               && (REGNO (reg) !=
12965                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12966           /* Don't allow SP to be loaded unless it is also the base register. It
12967              guarantees that SP is reset correctly when an LDM instruction
12968              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12969           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12970           || !MEM_P (mem)
12971           || GET_MODE (mem) != mode
12972           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12973                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12974                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12975                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12976                    offset + (i - base) * reg_increment))
12977               && (!REG_P (XEXP (mem, 0))
12978                   || offset + (i - base) * reg_increment != 0)))
12979         return false;
12980
12981       regno = REGNO (reg);
12982       if (regno == REGNO (addr))
12983         addr_reg_in_reglist = true;
12984     }
12985
12986   if (load)
12987     {
12988       if (update && addr_reg_in_reglist)
12989         return false;
12990
12991       /* For Thumb-1, address register is always modified - either by write-back
12992          or by explicit load.  If the pattern does not describe an update,
12993          then the address register must be in the list of loaded registers.  */
12994       if (TARGET_THUMB1)
12995         return update || addr_reg_in_reglist;
12996     }
12997
12998   return true;
12999 }
13000
13001 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13002    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13003    instruction.  ADD_OFFSET is nonzero if the base address register needs
13004    to be modified with an add instruction before we can use it.  */
13005
13006 static bool
13007 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13008                                  int nops, HOST_WIDE_INT add_offset)
13009  {
13010   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13011      if the offset isn't small enough.  The reason 2 ldrs are faster
13012      is because these ARMs are able to do more than one cache access
13013      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13014      whilst the ARM8 has a double bandwidth cache.  This means that
13015      these cores can do both an instruction fetch and a data fetch in
13016      a single cycle, so the trick of calculating the address into a
13017      scratch register (one of the result regs) and then doing a load
13018      multiple actually becomes slower (and no smaller in code size).
13019      That is the transformation
13020
13021         ldr     rd1, [rbase + offset]
13022         ldr     rd2, [rbase + offset + 4]
13023
13024      to
13025
13026         add     rd1, rbase, offset
13027         ldmia   rd1, {rd1, rd2}
13028
13029      produces worse code -- '3 cycles + any stalls on rd2' instead of
13030      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13031      access per cycle, the first sequence could never complete in less
13032      than 6 cycles, whereas the ldm sequence would only take 5 and
13033      would make better use of sequential accesses if not hitting the
13034      cache.
13035
13036      We cheat here and test 'arm_ld_sched' which we currently know to
13037      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13038      changes, then the test below needs to be reworked.  */
13039   if (nops == 2 && arm_ld_sched && add_offset != 0)
13040     return false;
13041
13042   /* XScale has load-store double instructions, but they have stricter
13043      alignment requirements than load-store multiple, so we cannot
13044      use them.
13045
13046      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13047      the pipeline until completion.
13048
13049         NREGS           CYCLES
13050           1               3
13051           2               4
13052           3               5
13053           4               6
13054
13055      An ldr instruction takes 1-3 cycles, but does not block the
13056      pipeline.
13057
13058         NREGS           CYCLES
13059           1              1-3
13060           2              2-6
13061           3              3-9
13062           4              4-12
13063
13064      Best case ldr will always win.  However, the more ldr instructions
13065      we issue, the less likely we are to be able to schedule them well.
13066      Using ldr instructions also increases code size.
13067
13068      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13069      for counts of 3 or 4 regs.  */
13070   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13071     return false;
13072   return true;
13073 }
13074
13075 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13076    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13077    an array ORDER which describes the sequence to use when accessing the
13078    offsets that produces an ascending order.  In this sequence, each
13079    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13080    must have been filled in with the lowest offset by the caller.
13081    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13082    we use to verify that ORDER produces an ascending order of registers.
13083    Return true if it was possible to construct such an order, false if
13084    not.  */
13085
13086 static bool
13087 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13088                       int *unsorted_regs)
13089 {
13090   int i;
13091   for (i = 1; i < nops; i++)
13092     {
13093       int j;
13094
13095       order[i] = order[i - 1];
13096       for (j = 0; j < nops; j++)
13097         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13098           {
13099             /* We must find exactly one offset that is higher than the
13100                previous one by 4.  */
13101             if (order[i] != order[i - 1])
13102               return false;
13103             order[i] = j;
13104           }
13105       if (order[i] == order[i - 1])
13106         return false;
13107       /* The register numbers must be ascending.  */
13108       if (unsorted_regs != NULL
13109           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13110         return false;
13111     }
13112   return true;
13113 }
13114
13115 /* Used to determine in a peephole whether a sequence of load
13116    instructions can be changed into a load-multiple instruction.
13117    NOPS is the number of separate load instructions we are examining.  The
13118    first NOPS entries in OPERANDS are the destination registers, the
13119    next NOPS entries are memory operands.  If this function is
13120    successful, *BASE is set to the common base register of the memory
13121    accesses; *LOAD_OFFSET is set to the first memory location's offset
13122    from that base register.
13123    REGS is an array filled in with the destination register numbers.
13124    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13125    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13126    the sequence of registers in REGS matches the loads from ascending memory
13127    locations, and the function verifies that the register numbers are
13128    themselves ascending.  If CHECK_REGS is false, the register numbers
13129    are stored in the order they are found in the operands.  */
13130 static int
13131 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13132                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13133 {
13134   int unsorted_regs[MAX_LDM_STM_OPS];
13135   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13136   int order[MAX_LDM_STM_OPS];
13137   rtx base_reg_rtx = NULL;
13138   int base_reg = -1;
13139   int i, ldm_case;
13140
13141   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13142      easily extended if required.  */
13143   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13144
13145   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13146
13147   /* Loop over the operands and check that the memory references are
13148      suitable (i.e. immediate offsets from the same base register).  At
13149      the same time, extract the target register, and the memory
13150      offsets.  */
13151   for (i = 0; i < nops; i++)
13152     {
13153       rtx reg;
13154       rtx offset;
13155
13156       /* Convert a subreg of a mem into the mem itself.  */
13157       if (GET_CODE (operands[nops + i]) == SUBREG)
13158         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13159
13160       gcc_assert (MEM_P (operands[nops + i]));
13161
13162       /* Don't reorder volatile memory references; it doesn't seem worth
13163          looking for the case where the order is ok anyway.  */
13164       if (MEM_VOLATILE_P (operands[nops + i]))
13165         return 0;
13166
13167       offset = const0_rtx;
13168
13169       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13170            || (GET_CODE (reg) == SUBREG
13171                && REG_P (reg = SUBREG_REG (reg))))
13172           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13173               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13174                   || (GET_CODE (reg) == SUBREG
13175                       && REG_P (reg = SUBREG_REG (reg))))
13176               && (CONST_INT_P (offset
13177                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13178         {
13179           if (i == 0)
13180             {
13181               base_reg = REGNO (reg);
13182               base_reg_rtx = reg;
13183               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13184                 return 0;
13185             }
13186           else if (base_reg != (int) REGNO (reg))
13187             /* Not addressed from the same base register.  */
13188             return 0;
13189
13190           unsorted_regs[i] = (REG_P (operands[i])
13191                               ? REGNO (operands[i])
13192                               : REGNO (SUBREG_REG (operands[i])));
13193
13194           /* If it isn't an integer register, or if it overwrites the
13195              base register but isn't the last insn in the list, then
13196              we can't do this.  */
13197           if (unsorted_regs[i] < 0
13198               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13199               || unsorted_regs[i] > 14
13200               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13201             return 0;
13202
13203           /* Don't allow SP to be loaded unless it is also the base
13204              register.  It guarantees that SP is reset correctly when
13205              an LDM instruction is interrupted.  Otherwise, we might
13206              end up with a corrupt stack.  */
13207           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13208             return 0;
13209
13210           unsorted_offsets[i] = INTVAL (offset);
13211           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13212             order[0] = i;
13213         }
13214       else
13215         /* Not a suitable memory address.  */
13216         return 0;
13217     }
13218
13219   /* All the useful information has now been extracted from the
13220      operands into unsorted_regs and unsorted_offsets; additionally,
13221      order[0] has been set to the lowest offset in the list.  Sort
13222      the offsets into order, verifying that they are adjacent, and
13223      check that the register numbers are ascending.  */
13224   if (!compute_offset_order (nops, unsorted_offsets, order,
13225                              check_regs ? unsorted_regs : NULL))
13226     return 0;
13227
13228   if (saved_order)
13229     memcpy (saved_order, order, sizeof order);
13230
13231   if (base)
13232     {
13233       *base = base_reg;
13234
13235       for (i = 0; i < nops; i++)
13236         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13237
13238       *load_offset = unsorted_offsets[order[0]];
13239     }
13240
13241   if (TARGET_THUMB1
13242       && !peep2_reg_dead_p (nops, base_reg_rtx))
13243     return 0;
13244
13245   if (unsorted_offsets[order[0]] == 0)
13246     ldm_case = 1; /* ldmia */
13247   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13248     ldm_case = 2; /* ldmib */
13249   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13250     ldm_case = 3; /* ldmda */
13251   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13252     ldm_case = 4; /* ldmdb */
13253   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13254            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13255     ldm_case = 5;
13256   else
13257     return 0;
13258
13259   if (!multiple_operation_profitable_p (false, nops,
13260                                         ldm_case == 5
13261                                         ? unsorted_offsets[order[0]] : 0))
13262     return 0;
13263
13264   return ldm_case;
13265 }
13266
13267 /* Used to determine in a peephole whether a sequence of store instructions can
13268    be changed into a store-multiple instruction.
13269    NOPS is the number of separate store instructions we are examining.
13270    NOPS_TOTAL is the total number of instructions recognized by the peephole
13271    pattern.
13272    The first NOPS entries in OPERANDS are the source registers, the next
13273    NOPS entries are memory operands.  If this function is successful, *BASE is
13274    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13275    to the first memory location's offset from that base register.  REGS is an
13276    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13277    likewise filled with the corresponding rtx's.
13278    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13279    numbers to an ascending order of stores.
13280    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13281    from ascending memory locations, and the function verifies that the register
13282    numbers are themselves ascending.  If CHECK_REGS is false, the register
13283    numbers are stored in the order they are found in the operands.  */
13284 static int
13285 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13286                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13287                          HOST_WIDE_INT *load_offset, bool check_regs)
13288 {
13289   int unsorted_regs[MAX_LDM_STM_OPS];
13290   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13291   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13292   int order[MAX_LDM_STM_OPS];
13293   int base_reg = -1;
13294   rtx base_reg_rtx = NULL;
13295   int i, stm_case;
13296
13297   /* Write back of base register is currently only supported for Thumb 1.  */
13298   int base_writeback = TARGET_THUMB1;
13299
13300   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13301      easily extended if required.  */
13302   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13303
13304   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13305
13306   /* Loop over the operands and check that the memory references are
13307      suitable (i.e. immediate offsets from the same base register).  At
13308      the same time, extract the target register, and the memory
13309      offsets.  */
13310   for (i = 0; i < nops; i++)
13311     {
13312       rtx reg;
13313       rtx offset;
13314
13315       /* Convert a subreg of a mem into the mem itself.  */
13316       if (GET_CODE (operands[nops + i]) == SUBREG)
13317         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13318
13319       gcc_assert (MEM_P (operands[nops + i]));
13320
13321       /* Don't reorder volatile memory references; it doesn't seem worth
13322          looking for the case where the order is ok anyway.  */
13323       if (MEM_VOLATILE_P (operands[nops + i]))
13324         return 0;
13325
13326       offset = const0_rtx;
13327
13328       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13329            || (GET_CODE (reg) == SUBREG
13330                && REG_P (reg = SUBREG_REG (reg))))
13331           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13332               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13333                   || (GET_CODE (reg) == SUBREG
13334                       && REG_P (reg = SUBREG_REG (reg))))
13335               && (CONST_INT_P (offset
13336                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13337         {
13338           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13339                                   ? operands[i] : SUBREG_REG (operands[i]));
13340           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13341
13342           if (i == 0)
13343             {
13344               base_reg = REGNO (reg);
13345               base_reg_rtx = reg;
13346               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13347                 return 0;
13348             }
13349           else if (base_reg != (int) REGNO (reg))
13350             /* Not addressed from the same base register.  */
13351             return 0;
13352
13353           /* If it isn't an integer register, then we can't do this.  */
13354           if (unsorted_regs[i] < 0
13355               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13356               /* The effects are unpredictable if the base register is
13357                  both updated and stored.  */
13358               || (base_writeback && unsorted_regs[i] == base_reg)
13359               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13360               || unsorted_regs[i] > 14)
13361             return 0;
13362
13363           unsorted_offsets[i] = INTVAL (offset);
13364           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13365             order[0] = i;
13366         }
13367       else
13368         /* Not a suitable memory address.  */
13369         return 0;
13370     }
13371
13372   /* All the useful information has now been extracted from the
13373      operands into unsorted_regs and unsorted_offsets; additionally,
13374      order[0] has been set to the lowest offset in the list.  Sort
13375      the offsets into order, verifying that they are adjacent, and
13376      check that the register numbers are ascending.  */
13377   if (!compute_offset_order (nops, unsorted_offsets, order,
13378                              check_regs ? unsorted_regs : NULL))
13379     return 0;
13380
13381   if (saved_order)
13382     memcpy (saved_order, order, sizeof order);
13383
13384   if (base)
13385     {
13386       *base = base_reg;
13387
13388       for (i = 0; i < nops; i++)
13389         {
13390           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13391           if (reg_rtxs)
13392             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13393         }
13394
13395       *load_offset = unsorted_offsets[order[0]];
13396     }
13397
13398   if (TARGET_THUMB1
13399       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13400     return 0;
13401
13402   if (unsorted_offsets[order[0]] == 0)
13403     stm_case = 1; /* stmia */
13404   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13405     stm_case = 2; /* stmib */
13406   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13407     stm_case = 3; /* stmda */
13408   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13409     stm_case = 4; /* stmdb */
13410   else
13411     return 0;
13412
13413   if (!multiple_operation_profitable_p (false, nops, 0))
13414     return 0;
13415
13416   return stm_case;
13417 }
13418 \f
13419 /* Routines for use in generating RTL.  */
13420
13421 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13422    the instruction; REGS and MEMS are arrays containing the operands.
13423    BASEREG is the base register to be used in addressing the memory operands.
13424    WBACK_OFFSET is nonzero if the instruction should update the base
13425    register.  */
13426
13427 static rtx
13428 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13429                          HOST_WIDE_INT wback_offset)
13430 {
13431   int i = 0, j;
13432   rtx result;
13433
13434   if (!multiple_operation_profitable_p (false, count, 0))
13435     {
13436       rtx seq;
13437
13438       start_sequence ();
13439
13440       for (i = 0; i < count; i++)
13441         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13442
13443       if (wback_offset != 0)
13444         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13445
13446       seq = get_insns ();
13447       end_sequence ();
13448
13449       return seq;
13450     }
13451
13452   result = gen_rtx_PARALLEL (VOIDmode,
13453                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13454   if (wback_offset != 0)
13455     {
13456       XVECEXP (result, 0, 0)
13457         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13458       i = 1;
13459       count++;
13460     }
13461
13462   for (j = 0; i < count; i++, j++)
13463     XVECEXP (result, 0, i)
13464       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13465
13466   return result;
13467 }
13468
13469 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13470    the instruction; REGS and MEMS are arrays containing the operands.
13471    BASEREG is the base register to be used in addressing the memory operands.
13472    WBACK_OFFSET is nonzero if the instruction should update the base
13473    register.  */
13474
13475 static rtx
13476 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13477                           HOST_WIDE_INT wback_offset)
13478 {
13479   int i = 0, j;
13480   rtx result;
13481
13482   if (GET_CODE (basereg) == PLUS)
13483     basereg = XEXP (basereg, 0);
13484
13485   if (!multiple_operation_profitable_p (false, count, 0))
13486     {
13487       rtx seq;
13488
13489       start_sequence ();
13490
13491       for (i = 0; i < count; i++)
13492         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13493
13494       if (wback_offset != 0)
13495         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13496
13497       seq = get_insns ();
13498       end_sequence ();
13499
13500       return seq;
13501     }
13502
13503   result = gen_rtx_PARALLEL (VOIDmode,
13504                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13505   if (wback_offset != 0)
13506     {
13507       XVECEXP (result, 0, 0)
13508         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13509       i = 1;
13510       count++;
13511     }
13512
13513   for (j = 0; i < count; i++, j++)
13514     XVECEXP (result, 0, i)
13515       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13516
13517   return result;
13518 }
13519
13520 /* Generate either a load-multiple or a store-multiple instruction.  This
13521    function can be used in situations where we can start with a single MEM
13522    rtx and adjust its address upwards.
13523    COUNT is the number of operations in the instruction, not counting a
13524    possible update of the base register.  REGS is an array containing the
13525    register operands.
13526    BASEREG is the base register to be used in addressing the memory operands,
13527    which are constructed from BASEMEM.
13528    WRITE_BACK specifies whether the generated instruction should include an
13529    update of the base register.
13530    OFFSETP is used to pass an offset to and from this function; this offset
13531    is not used when constructing the address (instead BASEMEM should have an
13532    appropriate offset in its address), it is used only for setting
13533    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13534
13535 static rtx
13536 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13537                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13538 {
13539   rtx mems[MAX_LDM_STM_OPS];
13540   HOST_WIDE_INT offset = *offsetp;
13541   int i;
13542
13543   gcc_assert (count <= MAX_LDM_STM_OPS);
13544
13545   if (GET_CODE (basereg) == PLUS)
13546     basereg = XEXP (basereg, 0);
13547
13548   for (i = 0; i < count; i++)
13549     {
13550       rtx addr = plus_constant (Pmode, basereg, i * 4);
13551       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13552       offset += 4;
13553     }
13554
13555   if (write_back)
13556     *offsetp = offset;
13557
13558   if (is_load)
13559     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13560                                     write_back ? 4 * count : 0);
13561   else
13562     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13563                                      write_back ? 4 * count : 0);
13564 }
13565
13566 rtx
13567 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13568                        rtx basemem, HOST_WIDE_INT *offsetp)
13569 {
13570   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13571                               offsetp);
13572 }
13573
13574 rtx
13575 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13576                         rtx basemem, HOST_WIDE_INT *offsetp)
13577 {
13578   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13579                               offsetp);
13580 }
13581
13582 /* Called from a peephole2 expander to turn a sequence of loads into an
13583    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13584    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13585    is true if we can reorder the registers because they are used commutatively
13586    subsequently.
13587    Returns true iff we could generate a new instruction.  */
13588
13589 bool
13590 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13591 {
13592   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13593   rtx mems[MAX_LDM_STM_OPS];
13594   int i, j, base_reg;
13595   rtx base_reg_rtx;
13596   HOST_WIDE_INT offset;
13597   int write_back = FALSE;
13598   int ldm_case;
13599   rtx addr;
13600
13601   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13602                                      &base_reg, &offset, !sort_regs);
13603
13604   if (ldm_case == 0)
13605     return false;
13606
13607   if (sort_regs)
13608     for (i = 0; i < nops - 1; i++)
13609       for (j = i + 1; j < nops; j++)
13610         if (regs[i] > regs[j])
13611           {
13612             int t = regs[i];
13613             regs[i] = regs[j];
13614             regs[j] = t;
13615           }
13616   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13617
13618   if (TARGET_THUMB1)
13619     {
13620       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13621       gcc_assert (ldm_case == 1 || ldm_case == 5);
13622       write_back = TRUE;
13623     }
13624
13625   if (ldm_case == 5)
13626     {
13627       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13628       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13629       offset = 0;
13630       if (!TARGET_THUMB1)
13631         base_reg_rtx = newbase;
13632     }
13633
13634   for (i = 0; i < nops; i++)
13635     {
13636       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13637       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13638                                               SImode, addr, 0);
13639     }
13640   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13641                                       write_back ? offset + i * 4 : 0));
13642   return true;
13643 }
13644
13645 /* Called from a peephole2 expander to turn a sequence of stores into an
13646    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13647    NOPS indicates how many separate stores we are trying to combine.
13648    Returns true iff we could generate a new instruction.  */
13649
13650 bool
13651 gen_stm_seq (rtx *operands, int nops)
13652 {
13653   int i;
13654   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13655   rtx mems[MAX_LDM_STM_OPS];
13656   int base_reg;
13657   rtx base_reg_rtx;
13658   HOST_WIDE_INT offset;
13659   int write_back = FALSE;
13660   int stm_case;
13661   rtx addr;
13662   bool base_reg_dies;
13663
13664   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13665                                       mem_order, &base_reg, &offset, true);
13666
13667   if (stm_case == 0)
13668     return false;
13669
13670   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13671
13672   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13673   if (TARGET_THUMB1)
13674     {
13675       gcc_assert (base_reg_dies);
13676       write_back = TRUE;
13677     }
13678
13679   if (stm_case == 5)
13680     {
13681       gcc_assert (base_reg_dies);
13682       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13683       offset = 0;
13684     }
13685
13686   addr = plus_constant (Pmode, base_reg_rtx, offset);
13687
13688   for (i = 0; i < nops; i++)
13689     {
13690       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13691       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13692                                               SImode, addr, 0);
13693     }
13694   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13695                                        write_back ? offset + i * 4 : 0));
13696   return true;
13697 }
13698
13699 /* Called from a peephole2 expander to turn a sequence of stores that are
13700    preceded by constant loads into an STM instruction.  OPERANDS are the
13701    operands found by the peephole matcher; NOPS indicates how many
13702    separate stores we are trying to combine; there are 2 * NOPS
13703    instructions in the peephole.
13704    Returns true iff we could generate a new instruction.  */
13705
13706 bool
13707 gen_const_stm_seq (rtx *operands, int nops)
13708 {
13709   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13710   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13711   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13712   rtx mems[MAX_LDM_STM_OPS];
13713   int base_reg;
13714   rtx base_reg_rtx;
13715   HOST_WIDE_INT offset;
13716   int write_back = FALSE;
13717   int stm_case;
13718   rtx addr;
13719   bool base_reg_dies;
13720   int i, j;
13721   HARD_REG_SET allocated;
13722
13723   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13724                                       mem_order, &base_reg, &offset, false);
13725
13726   if (stm_case == 0)
13727     return false;
13728
13729   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13730
13731   /* If the same register is used more than once, try to find a free
13732      register.  */
13733   CLEAR_HARD_REG_SET (allocated);
13734   for (i = 0; i < nops; i++)
13735     {
13736       for (j = i + 1; j < nops; j++)
13737         if (regs[i] == regs[j])
13738           {
13739             rtx t = peep2_find_free_register (0, nops * 2,
13740                                               TARGET_THUMB1 ? "l" : "r",
13741                                               SImode, &allocated);
13742             if (t == NULL_RTX)
13743               return false;
13744             reg_rtxs[i] = t;
13745             regs[i] = REGNO (t);
13746           }
13747     }
13748
13749   /* Compute an ordering that maps the register numbers to an ascending
13750      sequence.  */
13751   reg_order[0] = 0;
13752   for (i = 0; i < nops; i++)
13753     if (regs[i] < regs[reg_order[0]])
13754       reg_order[0] = i;
13755
13756   for (i = 1; i < nops; i++)
13757     {
13758       int this_order = reg_order[i - 1];
13759       for (j = 0; j < nops; j++)
13760         if (regs[j] > regs[reg_order[i - 1]]
13761             && (this_order == reg_order[i - 1]
13762                 || regs[j] < regs[this_order]))
13763           this_order = j;
13764       reg_order[i] = this_order;
13765     }
13766
13767   /* Ensure that registers that must be live after the instruction end
13768      up with the correct value.  */
13769   for (i = 0; i < nops; i++)
13770     {
13771       int this_order = reg_order[i];
13772       if ((this_order != mem_order[i]
13773            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13774           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13775         return false;
13776     }
13777
13778   /* Load the constants.  */
13779   for (i = 0; i < nops; i++)
13780     {
13781       rtx op = operands[2 * nops + mem_order[i]];
13782       sorted_regs[i] = regs[reg_order[i]];
13783       emit_move_insn (reg_rtxs[reg_order[i]], op);
13784     }
13785
13786   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13787
13788   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13789   if (TARGET_THUMB1)
13790     {
13791       gcc_assert (base_reg_dies);
13792       write_back = TRUE;
13793     }
13794
13795   if (stm_case == 5)
13796     {
13797       gcc_assert (base_reg_dies);
13798       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13799       offset = 0;
13800     }
13801
13802   addr = plus_constant (Pmode, base_reg_rtx, offset);
13803
13804   for (i = 0; i < nops; i++)
13805     {
13806       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13807       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13808                                               SImode, addr, 0);
13809     }
13810   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13811                                        write_back ? offset + i * 4 : 0));
13812   return true;
13813 }
13814
13815 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13816    unaligned copies on processors which support unaligned semantics for those
13817    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13818    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13819    An interleave factor of 1 (the minimum) will perform no interleaving.
13820    Load/store multiple are used for aligned addresses where possible.  */
13821
13822 static void
13823 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13824                                    HOST_WIDE_INT length,
13825                                    unsigned int interleave_factor)
13826 {
13827   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13828   int *regnos = XALLOCAVEC (int, interleave_factor);
13829   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13830   HOST_WIDE_INT i, j;
13831   HOST_WIDE_INT remaining = length, words;
13832   rtx halfword_tmp = NULL, byte_tmp = NULL;
13833   rtx dst, src;
13834   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13835   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13836   HOST_WIDE_INT srcoffset, dstoffset;
13837   HOST_WIDE_INT src_autoinc, dst_autoinc;
13838   rtx mem, addr;
13839
13840   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13841
13842   /* Use hard registers if we have aligned source or destination so we can use
13843      load/store multiple with contiguous registers.  */
13844   if (dst_aligned || src_aligned)
13845     for (i = 0; i < interleave_factor; i++)
13846       regs[i] = gen_rtx_REG (SImode, i);
13847   else
13848     for (i = 0; i < interleave_factor; i++)
13849       regs[i] = gen_reg_rtx (SImode);
13850
13851   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13852   src = copy_addr_to_reg (XEXP (srcbase, 0));
13853
13854   srcoffset = dstoffset = 0;
13855
13856   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13857      For copying the last bytes we want to subtract this offset again.  */
13858   src_autoinc = dst_autoinc = 0;
13859
13860   for (i = 0; i < interleave_factor; i++)
13861     regnos[i] = i;
13862
13863   /* Copy BLOCK_SIZE_BYTES chunks.  */
13864
13865   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13866     {
13867       /* Load words.  */
13868       if (src_aligned && interleave_factor > 1)
13869         {
13870           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13871                                             TRUE, srcbase, &srcoffset));
13872           src_autoinc += UNITS_PER_WORD * interleave_factor;
13873         }
13874       else
13875         {
13876           for (j = 0; j < interleave_factor; j++)
13877             {
13878               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13879                                                  - src_autoinc));
13880               mem = adjust_automodify_address (srcbase, SImode, addr,
13881                                                srcoffset + j * UNITS_PER_WORD);
13882               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13883             }
13884           srcoffset += block_size_bytes;
13885         }
13886
13887       /* Store words.  */
13888       if (dst_aligned && interleave_factor > 1)
13889         {
13890           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13891                                              TRUE, dstbase, &dstoffset));
13892           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13893         }
13894       else
13895         {
13896           for (j = 0; j < interleave_factor; j++)
13897             {
13898               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13899                                                  - dst_autoinc));
13900               mem = adjust_automodify_address (dstbase, SImode, addr,
13901                                                dstoffset + j * UNITS_PER_WORD);
13902               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13903             }
13904           dstoffset += block_size_bytes;
13905         }
13906
13907       remaining -= block_size_bytes;
13908     }
13909
13910   /* Copy any whole words left (note these aren't interleaved with any
13911      subsequent halfword/byte load/stores in the interests of simplicity).  */
13912
13913   words = remaining / UNITS_PER_WORD;
13914
13915   gcc_assert (words < interleave_factor);
13916
13917   if (src_aligned && words > 1)
13918     {
13919       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13920                                         &srcoffset));
13921       src_autoinc += UNITS_PER_WORD * words;
13922     }
13923   else
13924     {
13925       for (j = 0; j < words; j++)
13926         {
13927           addr = plus_constant (Pmode, src,
13928                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13929           mem = adjust_automodify_address (srcbase, SImode, addr,
13930                                            srcoffset + j * UNITS_PER_WORD);
13931           if (src_aligned)
13932             emit_move_insn (regs[j], mem);
13933           else
13934             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13935         }
13936       srcoffset += words * UNITS_PER_WORD;
13937     }
13938
13939   if (dst_aligned && words > 1)
13940     {
13941       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13942                                          &dstoffset));
13943       dst_autoinc += words * UNITS_PER_WORD;
13944     }
13945   else
13946     {
13947       for (j = 0; j < words; j++)
13948         {
13949           addr = plus_constant (Pmode, dst,
13950                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13951           mem = adjust_automodify_address (dstbase, SImode, addr,
13952                                            dstoffset + j * UNITS_PER_WORD);
13953           if (dst_aligned)
13954             emit_move_insn (mem, regs[j]);
13955           else
13956             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13957         }
13958       dstoffset += words * UNITS_PER_WORD;
13959     }
13960
13961   remaining -= words * UNITS_PER_WORD;
13962
13963   gcc_assert (remaining < 4);
13964
13965   /* Copy a halfword if necessary.  */
13966
13967   if (remaining >= 2)
13968     {
13969       halfword_tmp = gen_reg_rtx (SImode);
13970
13971       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13972       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13973       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13974
13975       /* Either write out immediately, or delay until we've loaded the last
13976          byte, depending on interleave factor.  */
13977       if (interleave_factor == 1)
13978         {
13979           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13980           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13981           emit_insn (gen_unaligned_storehi (mem,
13982                        gen_lowpart (HImode, halfword_tmp)));
13983           halfword_tmp = NULL;
13984           dstoffset += 2;
13985         }
13986
13987       remaining -= 2;
13988       srcoffset += 2;
13989     }
13990
13991   gcc_assert (remaining < 2);
13992
13993   /* Copy last byte.  */
13994
13995   if ((remaining & 1) != 0)
13996     {
13997       byte_tmp = gen_reg_rtx (SImode);
13998
13999       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14000       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14001       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14002
14003       if (interleave_factor == 1)
14004         {
14005           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14006           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14007           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14008           byte_tmp = NULL;
14009           dstoffset++;
14010         }
14011
14012       remaining--;
14013       srcoffset++;
14014     }
14015
14016   /* Store last halfword if we haven't done so already.  */
14017
14018   if (halfword_tmp)
14019     {
14020       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14021       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14022       emit_insn (gen_unaligned_storehi (mem,
14023                    gen_lowpart (HImode, halfword_tmp)));
14024       dstoffset += 2;
14025     }
14026
14027   /* Likewise for last byte.  */
14028
14029   if (byte_tmp)
14030     {
14031       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14032       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14033       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14034       dstoffset++;
14035     }
14036
14037   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14038 }
14039
14040 /* From mips_adjust_block_mem:
14041
14042    Helper function for doing a loop-based block operation on memory
14043    reference MEM.  Each iteration of the loop will operate on LENGTH
14044    bytes of MEM.
14045
14046    Create a new base register for use within the loop and point it to
14047    the start of MEM.  Create a new memory reference that uses this
14048    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14049
14050 static void
14051 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14052                       rtx *loop_mem)
14053 {
14054   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14055
14056   /* Although the new mem does not refer to a known location,
14057      it does keep up to LENGTH bytes of alignment.  */
14058   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14059   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14060 }
14061
14062 /* From mips_block_move_loop:
14063
14064    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14065    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14066    the memory regions do not overlap.  */
14067
14068 static void
14069 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14070                                unsigned int interleave_factor,
14071                                HOST_WIDE_INT bytes_per_iter)
14072 {
14073   rtx src_reg, dest_reg, final_src, test;
14074   HOST_WIDE_INT leftover;
14075
14076   leftover = length % bytes_per_iter;
14077   length -= leftover;
14078
14079   /* Create registers and memory references for use within the loop.  */
14080   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14081   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14082
14083   /* Calculate the value that SRC_REG should have after the last iteration of
14084      the loop.  */
14085   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14086                                    0, 0, OPTAB_WIDEN);
14087
14088   /* Emit the start of the loop.  */
14089   rtx_code_label *label = gen_label_rtx ();
14090   emit_label (label);
14091
14092   /* Emit the loop body.  */
14093   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14094                                      interleave_factor);
14095
14096   /* Move on to the next block.  */
14097   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14098   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14099
14100   /* Emit the loop condition.  */
14101   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14102   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14103
14104   /* Mop up any left-over bytes.  */
14105   if (leftover)
14106     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14107 }
14108
14109 /* Emit a block move when either the source or destination is unaligned (not
14110    aligned to a four-byte boundary).  This may need further tuning depending on
14111    core type, optimize_size setting, etc.  */
14112
14113 static int
14114 arm_movmemqi_unaligned (rtx *operands)
14115 {
14116   HOST_WIDE_INT length = INTVAL (operands[2]);
14117
14118   if (optimize_size)
14119     {
14120       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14121       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14122       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14123          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14124          or dst_aligned though: allow more interleaving in those cases since the
14125          resulting code can be smaller.  */
14126       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14127       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14128
14129       if (length > 12)
14130         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14131                                        interleave_factor, bytes_per_iter);
14132       else
14133         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14134                                            interleave_factor);
14135     }
14136   else
14137     {
14138       /* Note that the loop created by arm_block_move_unaligned_loop may be
14139          subject to loop unrolling, which makes tuning this condition a little
14140          redundant.  */
14141       if (length > 32)
14142         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14143       else
14144         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14145     }
14146
14147   return 1;
14148 }
14149
14150 int
14151 arm_gen_movmemqi (rtx *operands)
14152 {
14153   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14154   HOST_WIDE_INT srcoffset, dstoffset;
14155   rtx src, dst, srcbase, dstbase;
14156   rtx part_bytes_reg = NULL;
14157   rtx mem;
14158
14159   if (!CONST_INT_P (operands[2])
14160       || !CONST_INT_P (operands[3])
14161       || INTVAL (operands[2]) > 64)
14162     return 0;
14163
14164   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14165     return arm_movmemqi_unaligned (operands);
14166
14167   if (INTVAL (operands[3]) & 3)
14168     return 0;
14169
14170   dstbase = operands[0];
14171   srcbase = operands[1];
14172
14173   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14174   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14175
14176   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14177   out_words_to_go = INTVAL (operands[2]) / 4;
14178   last_bytes = INTVAL (operands[2]) & 3;
14179   dstoffset = srcoffset = 0;
14180
14181   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14182     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14183
14184   while (in_words_to_go >= 2)
14185     {
14186       if (in_words_to_go > 4)
14187         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14188                                           TRUE, srcbase, &srcoffset));
14189       else
14190         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14191                                           src, FALSE, srcbase,
14192                                           &srcoffset));
14193
14194       if (out_words_to_go)
14195         {
14196           if (out_words_to_go > 4)
14197             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14198                                                TRUE, dstbase, &dstoffset));
14199           else if (out_words_to_go != 1)
14200             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14201                                                out_words_to_go, dst,
14202                                                (last_bytes == 0
14203                                                 ? FALSE : TRUE),
14204                                                dstbase, &dstoffset));
14205           else
14206             {
14207               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14208               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14209               if (last_bytes != 0)
14210                 {
14211                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14212                   dstoffset += 4;
14213                 }
14214             }
14215         }
14216
14217       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14218       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14219     }
14220
14221   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14222   if (out_words_to_go)
14223     {
14224       rtx sreg;
14225
14226       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14227       sreg = copy_to_reg (mem);
14228
14229       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14230       emit_move_insn (mem, sreg);
14231       in_words_to_go--;
14232
14233       gcc_assert (!in_words_to_go);     /* Sanity check */
14234     }
14235
14236   if (in_words_to_go)
14237     {
14238       gcc_assert (in_words_to_go > 0);
14239
14240       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14241       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14242     }
14243
14244   gcc_assert (!last_bytes || part_bytes_reg);
14245
14246   if (BYTES_BIG_ENDIAN && last_bytes)
14247     {
14248       rtx tmp = gen_reg_rtx (SImode);
14249
14250       /* The bytes we want are in the top end of the word.  */
14251       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14252                               GEN_INT (8 * (4 - last_bytes))));
14253       part_bytes_reg = tmp;
14254
14255       while (last_bytes)
14256         {
14257           mem = adjust_automodify_address (dstbase, QImode,
14258                                            plus_constant (Pmode, dst,
14259                                                           last_bytes - 1),
14260                                            dstoffset + last_bytes - 1);
14261           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14262
14263           if (--last_bytes)
14264             {
14265               tmp = gen_reg_rtx (SImode);
14266               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14267               part_bytes_reg = tmp;
14268             }
14269         }
14270
14271     }
14272   else
14273     {
14274       if (last_bytes > 1)
14275         {
14276           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14277           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14278           last_bytes -= 2;
14279           if (last_bytes)
14280             {
14281               rtx tmp = gen_reg_rtx (SImode);
14282               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14283               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14284               part_bytes_reg = tmp;
14285               dstoffset += 2;
14286             }
14287         }
14288
14289       if (last_bytes)
14290         {
14291           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14292           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14293         }
14294     }
14295
14296   return 1;
14297 }
14298
14299 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14300 by mode size.  */
14301 inline static rtx
14302 next_consecutive_mem (rtx mem)
14303 {
14304   machine_mode mode = GET_MODE (mem);
14305   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14306   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14307
14308   return adjust_automodify_address (mem, mode, addr, offset);
14309 }
14310
14311 /* Copy using LDRD/STRD instructions whenever possible.
14312    Returns true upon success. */
14313 bool
14314 gen_movmem_ldrd_strd (rtx *operands)
14315 {
14316   unsigned HOST_WIDE_INT len;
14317   HOST_WIDE_INT align;
14318   rtx src, dst, base;
14319   rtx reg0;
14320   bool src_aligned, dst_aligned;
14321   bool src_volatile, dst_volatile;
14322
14323   gcc_assert (CONST_INT_P (operands[2]));
14324   gcc_assert (CONST_INT_P (operands[3]));
14325
14326   len = UINTVAL (operands[2]);
14327   if (len > 64)
14328     return false;
14329
14330   /* Maximum alignment we can assume for both src and dst buffers.  */
14331   align = INTVAL (operands[3]);
14332
14333   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14334     return false;
14335
14336   /* Place src and dst addresses in registers
14337      and update the corresponding mem rtx.  */
14338   dst = operands[0];
14339   dst_volatile = MEM_VOLATILE_P (dst);
14340   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14341   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14342   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14343
14344   src = operands[1];
14345   src_volatile = MEM_VOLATILE_P (src);
14346   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14347   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14348   src = adjust_automodify_address (src, VOIDmode, base, 0);
14349
14350   if (!unaligned_access && !(src_aligned && dst_aligned))
14351     return false;
14352
14353   if (src_volatile || dst_volatile)
14354     return false;
14355
14356   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14357   if (!(dst_aligned || src_aligned))
14358     return arm_gen_movmemqi (operands);
14359
14360   /* If the either src or dst is unaligned we'll be accessing it as pairs
14361      of unaligned SImode accesses.  Otherwise we can generate DImode
14362      ldrd/strd instructions.  */
14363   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14364   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14365
14366   while (len >= 8)
14367     {
14368       len -= 8;
14369       reg0 = gen_reg_rtx (DImode);
14370       rtx low_reg = NULL_RTX;
14371       rtx hi_reg = NULL_RTX;
14372
14373       if (!src_aligned || !dst_aligned)
14374         {
14375           low_reg = gen_lowpart (SImode, reg0);
14376           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14377         }
14378       if (src_aligned)
14379         emit_move_insn (reg0, src);
14380       else
14381         {
14382           emit_insn (gen_unaligned_loadsi (low_reg, src));
14383           src = next_consecutive_mem (src);
14384           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14385         }
14386
14387       if (dst_aligned)
14388         emit_move_insn (dst, reg0);
14389       else
14390         {
14391           emit_insn (gen_unaligned_storesi (dst, low_reg));
14392           dst = next_consecutive_mem (dst);
14393           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14394         }
14395
14396       src = next_consecutive_mem (src);
14397       dst = next_consecutive_mem (dst);
14398     }
14399
14400   gcc_assert (len < 8);
14401   if (len >= 4)
14402     {
14403       /* More than a word but less than a double-word to copy.  Copy a word.  */
14404       reg0 = gen_reg_rtx (SImode);
14405       src = adjust_address (src, SImode, 0);
14406       dst = adjust_address (dst, SImode, 0);
14407       if (src_aligned)
14408         emit_move_insn (reg0, src);
14409       else
14410         emit_insn (gen_unaligned_loadsi (reg0, src));
14411
14412       if (dst_aligned)
14413         emit_move_insn (dst, reg0);
14414       else
14415         emit_insn (gen_unaligned_storesi (dst, reg0));
14416
14417       src = next_consecutive_mem (src);
14418       dst = next_consecutive_mem (dst);
14419       len -= 4;
14420     }
14421
14422   if (len == 0)
14423     return true;
14424
14425   /* Copy the remaining bytes.  */
14426   if (len >= 2)
14427     {
14428       dst = adjust_address (dst, HImode, 0);
14429       src = adjust_address (src, HImode, 0);
14430       reg0 = gen_reg_rtx (SImode);
14431       if (src_aligned)
14432         emit_insn (gen_zero_extendhisi2 (reg0, src));
14433       else
14434         emit_insn (gen_unaligned_loadhiu (reg0, src));
14435
14436       if (dst_aligned)
14437         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14438       else
14439         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14440
14441       src = next_consecutive_mem (src);
14442       dst = next_consecutive_mem (dst);
14443       if (len == 2)
14444         return true;
14445     }
14446
14447   dst = adjust_address (dst, QImode, 0);
14448   src = adjust_address (src, QImode, 0);
14449   reg0 = gen_reg_rtx (QImode);
14450   emit_move_insn (reg0, src);
14451   emit_move_insn (dst, reg0);
14452   return true;
14453 }
14454
14455 /* Select a dominance comparison mode if possible for a test of the general
14456    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14457    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14458    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14459    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14460    In all cases OP will be either EQ or NE, but we don't need to know which
14461    here.  If we are unable to support a dominance comparison we return
14462    CC mode.  This will then fail to match for the RTL expressions that
14463    generate this call.  */
14464 machine_mode
14465 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14466 {
14467   enum rtx_code cond1, cond2;
14468   int swapped = 0;
14469
14470   /* Currently we will probably get the wrong result if the individual
14471      comparisons are not simple.  This also ensures that it is safe to
14472      reverse a comparison if necessary.  */
14473   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14474        != CCmode)
14475       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14476           != CCmode))
14477     return CCmode;
14478
14479   /* The if_then_else variant of this tests the second condition if the
14480      first passes, but is true if the first fails.  Reverse the first
14481      condition to get a true "inclusive-or" expression.  */
14482   if (cond_or == DOM_CC_NX_OR_Y)
14483     cond1 = reverse_condition (cond1);
14484
14485   /* If the comparisons are not equal, and one doesn't dominate the other,
14486      then we can't do this.  */
14487   if (cond1 != cond2
14488       && !comparison_dominates_p (cond1, cond2)
14489       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14490     return CCmode;
14491
14492   if (swapped)
14493     std::swap (cond1, cond2);
14494
14495   switch (cond1)
14496     {
14497     case EQ:
14498       if (cond_or == DOM_CC_X_AND_Y)
14499         return CC_DEQmode;
14500
14501       switch (cond2)
14502         {
14503         case EQ: return CC_DEQmode;
14504         case LE: return CC_DLEmode;
14505         case LEU: return CC_DLEUmode;
14506         case GE: return CC_DGEmode;
14507         case GEU: return CC_DGEUmode;
14508         default: gcc_unreachable ();
14509         }
14510
14511     case LT:
14512       if (cond_or == DOM_CC_X_AND_Y)
14513         return CC_DLTmode;
14514
14515       switch (cond2)
14516         {
14517         case  LT:
14518             return CC_DLTmode;
14519         case LE:
14520           return CC_DLEmode;
14521         case NE:
14522           return CC_DNEmode;
14523         default:
14524           gcc_unreachable ();
14525         }
14526
14527     case GT:
14528       if (cond_or == DOM_CC_X_AND_Y)
14529         return CC_DGTmode;
14530
14531       switch (cond2)
14532         {
14533         case GT:
14534           return CC_DGTmode;
14535         case GE:
14536           return CC_DGEmode;
14537         case NE:
14538           return CC_DNEmode;
14539         default:
14540           gcc_unreachable ();
14541         }
14542
14543     case LTU:
14544       if (cond_or == DOM_CC_X_AND_Y)
14545         return CC_DLTUmode;
14546
14547       switch (cond2)
14548         {
14549         case LTU:
14550           return CC_DLTUmode;
14551         case LEU:
14552           return CC_DLEUmode;
14553         case NE:
14554           return CC_DNEmode;
14555         default:
14556           gcc_unreachable ();
14557         }
14558
14559     case GTU:
14560       if (cond_or == DOM_CC_X_AND_Y)
14561         return CC_DGTUmode;
14562
14563       switch (cond2)
14564         {
14565         case GTU:
14566           return CC_DGTUmode;
14567         case GEU:
14568           return CC_DGEUmode;
14569         case NE:
14570           return CC_DNEmode;
14571         default:
14572           gcc_unreachable ();
14573         }
14574
14575     /* The remaining cases only occur when both comparisons are the
14576        same.  */
14577     case NE:
14578       gcc_assert (cond1 == cond2);
14579       return CC_DNEmode;
14580
14581     case LE:
14582       gcc_assert (cond1 == cond2);
14583       return CC_DLEmode;
14584
14585     case GE:
14586       gcc_assert (cond1 == cond2);
14587       return CC_DGEmode;
14588
14589     case LEU:
14590       gcc_assert (cond1 == cond2);
14591       return CC_DLEUmode;
14592
14593     case GEU:
14594       gcc_assert (cond1 == cond2);
14595       return CC_DGEUmode;
14596
14597     default:
14598       gcc_unreachable ();
14599     }
14600 }
14601
14602 machine_mode
14603 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14604 {
14605   /* All floating point compares return CCFP if it is an equality
14606      comparison, and CCFPE otherwise.  */
14607   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14608     {
14609       switch (op)
14610         {
14611         case EQ:
14612         case NE:
14613         case UNORDERED:
14614         case ORDERED:
14615         case UNLT:
14616         case UNLE:
14617         case UNGT:
14618         case UNGE:
14619         case UNEQ:
14620         case LTGT:
14621           return CCFPmode;
14622
14623         case LT:
14624         case LE:
14625         case GT:
14626         case GE:
14627           return CCFPEmode;
14628
14629         default:
14630           gcc_unreachable ();
14631         }
14632     }
14633
14634   /* A compare with a shifted operand.  Because of canonicalization, the
14635      comparison will have to be swapped when we emit the assembler.  */
14636   if (GET_MODE (y) == SImode
14637       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14638       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14639           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14640           || GET_CODE (x) == ROTATERT))
14641     return CC_SWPmode;
14642
14643   /* This operation is performed swapped, but since we only rely on the Z
14644      flag we don't need an additional mode.  */
14645   if (GET_MODE (y) == SImode
14646       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14647       && GET_CODE (x) == NEG
14648       && (op == EQ || op == NE))
14649     return CC_Zmode;
14650
14651   /* This is a special case that is used by combine to allow a
14652      comparison of a shifted byte load to be split into a zero-extend
14653      followed by a comparison of the shifted integer (only valid for
14654      equalities and unsigned inequalities).  */
14655   if (GET_MODE (x) == SImode
14656       && GET_CODE (x) == ASHIFT
14657       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14658       && GET_CODE (XEXP (x, 0)) == SUBREG
14659       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14660       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14661       && (op == EQ || op == NE
14662           || op == GEU || op == GTU || op == LTU || op == LEU)
14663       && CONST_INT_P (y))
14664     return CC_Zmode;
14665
14666   /* A construct for a conditional compare, if the false arm contains
14667      0, then both conditions must be true, otherwise either condition
14668      must be true.  Not all conditions are possible, so CCmode is
14669      returned if it can't be done.  */
14670   if (GET_CODE (x) == IF_THEN_ELSE
14671       && (XEXP (x, 2) == const0_rtx
14672           || XEXP (x, 2) == const1_rtx)
14673       && COMPARISON_P (XEXP (x, 0))
14674       && COMPARISON_P (XEXP (x, 1)))
14675     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14676                                          INTVAL (XEXP (x, 2)));
14677
14678   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14679   if (GET_CODE (x) == AND
14680       && (op == EQ || op == NE)
14681       && COMPARISON_P (XEXP (x, 0))
14682       && COMPARISON_P (XEXP (x, 1)))
14683     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14684                                          DOM_CC_X_AND_Y);
14685
14686   if (GET_CODE (x) == IOR
14687       && (op == EQ || op == NE)
14688       && COMPARISON_P (XEXP (x, 0))
14689       && COMPARISON_P (XEXP (x, 1)))
14690     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14691                                          DOM_CC_X_OR_Y);
14692
14693   /* An operation (on Thumb) where we want to test for a single bit.
14694      This is done by shifting that bit up into the top bit of a
14695      scratch register; we can then branch on the sign bit.  */
14696   if (TARGET_THUMB1
14697       && GET_MODE (x) == SImode
14698       && (op == EQ || op == NE)
14699       && GET_CODE (x) == ZERO_EXTRACT
14700       && XEXP (x, 1) == const1_rtx)
14701     return CC_Nmode;
14702
14703   /* An operation that sets the condition codes as a side-effect, the
14704      V flag is not set correctly, so we can only use comparisons where
14705      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14706      instead.)  */
14707   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14708   if (GET_MODE (x) == SImode
14709       && y == const0_rtx
14710       && (op == EQ || op == NE || op == LT || op == GE)
14711       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14712           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14713           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14714           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14715           || GET_CODE (x) == LSHIFTRT
14716           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14717           || GET_CODE (x) == ROTATERT
14718           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14719     return CC_NOOVmode;
14720
14721   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14722     return CC_Zmode;
14723
14724   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14725       && GET_CODE (x) == PLUS
14726       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14727     return CC_Cmode;
14728
14729   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14730     {
14731       switch (op)
14732         {
14733         case EQ:
14734         case NE:
14735           /* A DImode comparison against zero can be implemented by
14736              or'ing the two halves together.  */
14737           if (y == const0_rtx)
14738             return CC_Zmode;
14739
14740           /* We can do an equality test in three Thumb instructions.  */
14741           if (!TARGET_32BIT)
14742             return CC_Zmode;
14743
14744           /* FALLTHROUGH */
14745
14746         case LTU:
14747         case LEU:
14748         case GTU:
14749         case GEU:
14750           /* DImode unsigned comparisons can be implemented by cmp +
14751              cmpeq without a scratch register.  Not worth doing in
14752              Thumb-2.  */
14753           if (TARGET_32BIT)
14754             return CC_CZmode;
14755
14756           /* FALLTHROUGH */
14757
14758         case LT:
14759         case LE:
14760         case GT:
14761         case GE:
14762           /* DImode signed and unsigned comparisons can be implemented
14763              by cmp + sbcs with a scratch register, but that does not
14764              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14765           gcc_assert (op != EQ && op != NE);
14766           return CC_NCVmode;
14767
14768         default:
14769           gcc_unreachable ();
14770         }
14771     }
14772
14773   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14774     return GET_MODE (x);
14775
14776   return CCmode;
14777 }
14778
14779 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14780    return the rtx for register 0 in the proper mode.  FP means this is a
14781    floating point compare: I don't think that it is needed on the arm.  */
14782 rtx
14783 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14784 {
14785   machine_mode mode;
14786   rtx cc_reg;
14787   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14788
14789   /* We might have X as a constant, Y as a register because of the predicates
14790      used for cmpdi.  If so, force X to a register here.  */
14791   if (dimode_comparison && !REG_P (x))
14792     x = force_reg (DImode, x);
14793
14794   mode = SELECT_CC_MODE (code, x, y);
14795   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14796
14797   if (dimode_comparison
14798       && mode != CC_CZmode)
14799     {
14800       rtx clobber, set;
14801
14802       /* To compare two non-zero values for equality, XOR them and
14803          then compare against zero.  Not used for ARM mode; there
14804          CC_CZmode is cheaper.  */
14805       if (mode == CC_Zmode && y != const0_rtx)
14806         {
14807           gcc_assert (!reload_completed);
14808           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14809           y = const0_rtx;
14810         }
14811
14812       /* A scratch register is required.  */
14813       if (reload_completed)
14814         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14815       else
14816         scratch = gen_rtx_SCRATCH (SImode);
14817
14818       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14819       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14820       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14821     }
14822   else
14823     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14824
14825   return cc_reg;
14826 }
14827
14828 /* Generate a sequence of insns that will generate the correct return
14829    address mask depending on the physical architecture that the program
14830    is running on.  */
14831 rtx
14832 arm_gen_return_addr_mask (void)
14833 {
14834   rtx reg = gen_reg_rtx (Pmode);
14835
14836   emit_insn (gen_return_addr_mask (reg));
14837   return reg;
14838 }
14839
14840 void
14841 arm_reload_in_hi (rtx *operands)
14842 {
14843   rtx ref = operands[1];
14844   rtx base, scratch;
14845   HOST_WIDE_INT offset = 0;
14846
14847   if (GET_CODE (ref) == SUBREG)
14848     {
14849       offset = SUBREG_BYTE (ref);
14850       ref = SUBREG_REG (ref);
14851     }
14852
14853   if (REG_P (ref))
14854     {
14855       /* We have a pseudo which has been spilt onto the stack; there
14856          are two cases here: the first where there is a simple
14857          stack-slot replacement and a second where the stack-slot is
14858          out of range, or is used as a subreg.  */
14859       if (reg_equiv_mem (REGNO (ref)))
14860         {
14861           ref = reg_equiv_mem (REGNO (ref));
14862           base = find_replacement (&XEXP (ref, 0));
14863         }
14864       else
14865         /* The slot is out of range, or was dressed up in a SUBREG.  */
14866         base = reg_equiv_address (REGNO (ref));
14867
14868       /* PR 62554: If there is no equivalent memory location then just move
14869          the value as an SImode register move.  This happens when the target
14870          architecture variant does not have an HImode register move.  */
14871       if (base == NULL)
14872         {
14873           gcc_assert (REG_P (operands[0]));
14874           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14875                                 gen_rtx_SUBREG (SImode, ref, 0)));
14876           return;
14877         }
14878     }
14879   else
14880     base = find_replacement (&XEXP (ref, 0));
14881
14882   /* Handle the case where the address is too complex to be offset by 1.  */
14883   if (GET_CODE (base) == MINUS
14884       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14885     {
14886       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14887
14888       emit_set_insn (base_plus, base);
14889       base = base_plus;
14890     }
14891   else if (GET_CODE (base) == PLUS)
14892     {
14893       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14894       HOST_WIDE_INT hi, lo;
14895
14896       offset += INTVAL (XEXP (base, 1));
14897       base = XEXP (base, 0);
14898
14899       /* Rework the address into a legal sequence of insns.  */
14900       /* Valid range for lo is -4095 -> 4095 */
14901       lo = (offset >= 0
14902             ? (offset & 0xfff)
14903             : -((-offset) & 0xfff));
14904
14905       /* Corner case, if lo is the max offset then we would be out of range
14906          once we have added the additional 1 below, so bump the msb into the
14907          pre-loading insn(s).  */
14908       if (lo == 4095)
14909         lo &= 0x7ff;
14910
14911       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14912              ^ (HOST_WIDE_INT) 0x80000000)
14913             - (HOST_WIDE_INT) 0x80000000);
14914
14915       gcc_assert (hi + lo == offset);
14916
14917       if (hi != 0)
14918         {
14919           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14920
14921           /* Get the base address; addsi3 knows how to handle constants
14922              that require more than one insn.  */
14923           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14924           base = base_plus;
14925           offset = lo;
14926         }
14927     }
14928
14929   /* Operands[2] may overlap operands[0] (though it won't overlap
14930      operands[1]), that's why we asked for a DImode reg -- so we can
14931      use the bit that does not overlap.  */
14932   if (REGNO (operands[2]) == REGNO (operands[0]))
14933     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14934   else
14935     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14936
14937   emit_insn (gen_zero_extendqisi2 (scratch,
14938                                    gen_rtx_MEM (QImode,
14939                                                 plus_constant (Pmode, base,
14940                                                                offset))));
14941   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14942                                    gen_rtx_MEM (QImode,
14943                                                 plus_constant (Pmode, base,
14944                                                                offset + 1))));
14945   if (!BYTES_BIG_ENDIAN)
14946     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14947                    gen_rtx_IOR (SImode,
14948                                 gen_rtx_ASHIFT
14949                                 (SImode,
14950                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14951                                  GEN_INT (8)),
14952                                 scratch));
14953   else
14954     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14955                    gen_rtx_IOR (SImode,
14956                                 gen_rtx_ASHIFT (SImode, scratch,
14957                                                 GEN_INT (8)),
14958                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14959 }
14960
14961 /* Handle storing a half-word to memory during reload by synthesizing as two
14962    byte stores.  Take care not to clobber the input values until after we
14963    have moved them somewhere safe.  This code assumes that if the DImode
14964    scratch in operands[2] overlaps either the input value or output address
14965    in some way, then that value must die in this insn (we absolutely need
14966    two scratch registers for some corner cases).  */
14967 void
14968 arm_reload_out_hi (rtx *operands)
14969 {
14970   rtx ref = operands[0];
14971   rtx outval = operands[1];
14972   rtx base, scratch;
14973   HOST_WIDE_INT offset = 0;
14974
14975   if (GET_CODE (ref) == SUBREG)
14976     {
14977       offset = SUBREG_BYTE (ref);
14978       ref = SUBREG_REG (ref);
14979     }
14980
14981   if (REG_P (ref))
14982     {
14983       /* We have a pseudo which has been spilt onto the stack; there
14984          are two cases here: the first where there is a simple
14985          stack-slot replacement and a second where the stack-slot is
14986          out of range, or is used as a subreg.  */
14987       if (reg_equiv_mem (REGNO (ref)))
14988         {
14989           ref = reg_equiv_mem (REGNO (ref));
14990           base = find_replacement (&XEXP (ref, 0));
14991         }
14992       else
14993         /* The slot is out of range, or was dressed up in a SUBREG.  */
14994         base = reg_equiv_address (REGNO (ref));
14995
14996       /* PR 62254: If there is no equivalent memory location then just move
14997          the value as an SImode register move.  This happens when the target
14998          architecture variant does not have an HImode register move.  */
14999       if (base == NULL)
15000         {
15001           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15002
15003           if (REG_P (outval))
15004             {
15005               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15006                                     gen_rtx_SUBREG (SImode, outval, 0)));
15007             }
15008           else /* SUBREG_P (outval)  */
15009             {
15010               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15011                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15012                                       SUBREG_REG (outval)));
15013               else
15014                 /* FIXME: Handle other cases ?  */
15015                 gcc_unreachable ();
15016             }
15017           return;
15018         }
15019     }
15020   else
15021     base = find_replacement (&XEXP (ref, 0));
15022
15023   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15024
15025   /* Handle the case where the address is too complex to be offset by 1.  */
15026   if (GET_CODE (base) == MINUS
15027       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15028     {
15029       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15030
15031       /* Be careful not to destroy OUTVAL.  */
15032       if (reg_overlap_mentioned_p (base_plus, outval))
15033         {
15034           /* Updating base_plus might destroy outval, see if we can
15035              swap the scratch and base_plus.  */
15036           if (!reg_overlap_mentioned_p (scratch, outval))
15037             std::swap (scratch, base_plus);
15038           else
15039             {
15040               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15041
15042               /* Be conservative and copy OUTVAL into the scratch now,
15043                  this should only be necessary if outval is a subreg
15044                  of something larger than a word.  */
15045               /* XXX Might this clobber base?  I can't see how it can,
15046                  since scratch is known to overlap with OUTVAL, and
15047                  must be wider than a word.  */
15048               emit_insn (gen_movhi (scratch_hi, outval));
15049               outval = scratch_hi;
15050             }
15051         }
15052
15053       emit_set_insn (base_plus, base);
15054       base = base_plus;
15055     }
15056   else if (GET_CODE (base) == PLUS)
15057     {
15058       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15059       HOST_WIDE_INT hi, lo;
15060
15061       offset += INTVAL (XEXP (base, 1));
15062       base = XEXP (base, 0);
15063
15064       /* Rework the address into a legal sequence of insns.  */
15065       /* Valid range for lo is -4095 -> 4095 */
15066       lo = (offset >= 0
15067             ? (offset & 0xfff)
15068             : -((-offset) & 0xfff));
15069
15070       /* Corner case, if lo is the max offset then we would be out of range
15071          once we have added the additional 1 below, so bump the msb into the
15072          pre-loading insn(s).  */
15073       if (lo == 4095)
15074         lo &= 0x7ff;
15075
15076       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15077              ^ (HOST_WIDE_INT) 0x80000000)
15078             - (HOST_WIDE_INT) 0x80000000);
15079
15080       gcc_assert (hi + lo == offset);
15081
15082       if (hi != 0)
15083         {
15084           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15085
15086           /* Be careful not to destroy OUTVAL.  */
15087           if (reg_overlap_mentioned_p (base_plus, outval))
15088             {
15089               /* Updating base_plus might destroy outval, see if we
15090                  can swap the scratch and base_plus.  */
15091               if (!reg_overlap_mentioned_p (scratch, outval))
15092                 std::swap (scratch, base_plus);
15093               else
15094                 {
15095                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15096
15097                   /* Be conservative and copy outval into scratch now,
15098                      this should only be necessary if outval is a
15099                      subreg of something larger than a word.  */
15100                   /* XXX Might this clobber base?  I can't see how it
15101                      can, since scratch is known to overlap with
15102                      outval.  */
15103                   emit_insn (gen_movhi (scratch_hi, outval));
15104                   outval = scratch_hi;
15105                 }
15106             }
15107
15108           /* Get the base address; addsi3 knows how to handle constants
15109              that require more than one insn.  */
15110           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15111           base = base_plus;
15112           offset = lo;
15113         }
15114     }
15115
15116   if (BYTES_BIG_ENDIAN)
15117     {
15118       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15119                                          plus_constant (Pmode, base,
15120                                                         offset + 1)),
15121                             gen_lowpart (QImode, outval)));
15122       emit_insn (gen_lshrsi3 (scratch,
15123                               gen_rtx_SUBREG (SImode, outval, 0),
15124                               GEN_INT (8)));
15125       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15126                                                                 offset)),
15127                             gen_lowpart (QImode, scratch)));
15128     }
15129   else
15130     {
15131       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15132                                                                 offset)),
15133                             gen_lowpart (QImode, outval)));
15134       emit_insn (gen_lshrsi3 (scratch,
15135                               gen_rtx_SUBREG (SImode, outval, 0),
15136                               GEN_INT (8)));
15137       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15138                                          plus_constant (Pmode, base,
15139                                                         offset + 1)),
15140                             gen_lowpart (QImode, scratch)));
15141     }
15142 }
15143
15144 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15145    (padded to the size of a word) should be passed in a register.  */
15146
15147 static bool
15148 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15149 {
15150   if (TARGET_AAPCS_BASED)
15151     return must_pass_in_stack_var_size (mode, type);
15152   else
15153     return must_pass_in_stack_var_size_or_pad (mode, type);
15154 }
15155
15156
15157 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15158    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15159    the default.  For AAPCS based ABIs small aggregate types are placed
15160    in the lowest memory address.  */
15161
15162 static pad_direction
15163 arm_function_arg_padding (machine_mode mode, const_tree type)
15164 {
15165   if (!TARGET_AAPCS_BASED)
15166     return default_function_arg_padding (mode, type);
15167
15168   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15169     return PAD_DOWNWARD;
15170
15171   return PAD_UPWARD;
15172 }
15173
15174
15175 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15176    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15177    register has useful data, and return the opposite if the most
15178    significant byte does.  */
15179
15180 bool
15181 arm_pad_reg_upward (machine_mode mode,
15182                     tree type, int first ATTRIBUTE_UNUSED)
15183 {
15184   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15185     {
15186       /* For AAPCS, small aggregates, small fixed-point types,
15187          and small complex types are always padded upwards.  */
15188       if (type)
15189         {
15190           if ((AGGREGATE_TYPE_P (type)
15191                || TREE_CODE (type) == COMPLEX_TYPE
15192                || FIXED_POINT_TYPE_P (type))
15193               && int_size_in_bytes (type) <= 4)
15194             return true;
15195         }
15196       else
15197         {
15198           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15199               && GET_MODE_SIZE (mode) <= 4)
15200             return true;
15201         }
15202     }
15203
15204   /* Otherwise, use default padding.  */
15205   return !BYTES_BIG_ENDIAN;
15206 }
15207
15208 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15209    assuming that the address in the base register is word aligned.  */
15210 bool
15211 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15212 {
15213   HOST_WIDE_INT max_offset;
15214
15215   /* Offset must be a multiple of 4 in Thumb mode.  */
15216   if (TARGET_THUMB2 && ((offset & 3) != 0))
15217     return false;
15218
15219   if (TARGET_THUMB2)
15220     max_offset = 1020;
15221   else if (TARGET_ARM)
15222     max_offset = 255;
15223   else
15224     return false;
15225
15226   return ((offset <= max_offset) && (offset >= -max_offset));
15227 }
15228
15229 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15230    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15231    Assumes that the address in the base register RN is word aligned.  Pattern
15232    guarantees that both memory accesses use the same base register,
15233    the offsets are constants within the range, and the gap between the offsets is 4.
15234    If preload complete then check that registers are legal.  WBACK indicates whether
15235    address is updated.  LOAD indicates whether memory access is load or store.  */
15236 bool
15237 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15238                        bool wback, bool load)
15239 {
15240   unsigned int t, t2, n;
15241
15242   if (!reload_completed)
15243     return true;
15244
15245   if (!offset_ok_for_ldrd_strd (offset))
15246     return false;
15247
15248   t = REGNO (rt);
15249   t2 = REGNO (rt2);
15250   n = REGNO (rn);
15251
15252   if ((TARGET_THUMB2)
15253       && ((wback && (n == t || n == t2))
15254           || (t == SP_REGNUM)
15255           || (t == PC_REGNUM)
15256           || (t2 == SP_REGNUM)
15257           || (t2 == PC_REGNUM)
15258           || (!load && (n == PC_REGNUM))
15259           || (load && (t == t2))
15260           /* Triggers Cortex-M3 LDRD errata.  */
15261           || (!wback && load && fix_cm3_ldrd && (n == t))))
15262     return false;
15263
15264   if ((TARGET_ARM)
15265       && ((wback && (n == t || n == t2))
15266           || (t2 == PC_REGNUM)
15267           || (t % 2 != 0)   /* First destination register is not even.  */
15268           || (t2 != t + 1)
15269           /* PC can be used as base register (for offset addressing only),
15270              but it is depricated.  */
15271           || (n == PC_REGNUM)))
15272     return false;
15273
15274   return true;
15275 }
15276
15277 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15278    operand MEM's address contains an immediate offset from the base
15279    register and has no side effects, in which case it sets BASE and
15280    OFFSET accordingly.  */
15281 static bool
15282 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15283 {
15284   rtx addr;
15285
15286   gcc_assert (base != NULL && offset != NULL);
15287
15288   /* TODO: Handle more general memory operand patterns, such as
15289      PRE_DEC and PRE_INC.  */
15290
15291   if (side_effects_p (mem))
15292     return false;
15293
15294   /* Can't deal with subregs.  */
15295   if (GET_CODE (mem) == SUBREG)
15296     return false;
15297
15298   gcc_assert (MEM_P (mem));
15299
15300   *offset = const0_rtx;
15301
15302   addr = XEXP (mem, 0);
15303
15304   /* If addr isn't valid for DImode, then we can't handle it.  */
15305   if (!arm_legitimate_address_p (DImode, addr,
15306                                  reload_in_progress || reload_completed))
15307     return false;
15308
15309   if (REG_P (addr))
15310     {
15311       *base = addr;
15312       return true;
15313     }
15314   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15315     {
15316       *base = XEXP (addr, 0);
15317       *offset = XEXP (addr, 1);
15318       return (REG_P (*base) && CONST_INT_P (*offset));
15319     }
15320
15321   return false;
15322 }
15323
15324 /* Called from a peephole2 to replace two word-size accesses with a
15325    single LDRD/STRD instruction.  Returns true iff we can generate a
15326    new instruction sequence.  That is, both accesses use the same base
15327    register and the gap between constant offsets is 4.  This function
15328    may reorder its operands to match ldrd/strd RTL templates.
15329    OPERANDS are the operands found by the peephole matcher;
15330    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15331    corresponding memory operands.  LOAD indicaates whether the access
15332    is load or store.  CONST_STORE indicates a store of constant
15333    integer values held in OPERANDS[4,5] and assumes that the pattern
15334    is of length 4 insn, for the purpose of checking dead registers.
15335    COMMUTE indicates that register operands may be reordered.  */
15336 bool
15337 gen_operands_ldrd_strd (rtx *operands, bool load,
15338                         bool const_store, bool commute)
15339 {
15340   int nops = 2;
15341   HOST_WIDE_INT offsets[2], offset;
15342   rtx base = NULL_RTX;
15343   rtx cur_base, cur_offset, tmp;
15344   int i, gap;
15345   HARD_REG_SET regset;
15346
15347   gcc_assert (!const_store || !load);
15348   /* Check that the memory references are immediate offsets from the
15349      same base register.  Extract the base register, the destination
15350      registers, and the corresponding memory offsets.  */
15351   for (i = 0; i < nops; i++)
15352     {
15353       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15354         return false;
15355
15356       if (i == 0)
15357         base = cur_base;
15358       else if (REGNO (base) != REGNO (cur_base))
15359         return false;
15360
15361       offsets[i] = INTVAL (cur_offset);
15362       if (GET_CODE (operands[i]) == SUBREG)
15363         {
15364           tmp = SUBREG_REG (operands[i]);
15365           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15366           operands[i] = tmp;
15367         }
15368     }
15369
15370   /* Make sure there is no dependency between the individual loads.  */
15371   if (load && REGNO (operands[0]) == REGNO (base))
15372     return false; /* RAW */
15373
15374   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15375     return false; /* WAW */
15376
15377   /* If the same input register is used in both stores
15378      when storing different constants, try to find a free register.
15379      For example, the code
15380         mov r0, 0
15381         str r0, [r2]
15382         mov r0, 1
15383         str r0, [r2, #4]
15384      can be transformed into
15385         mov r1, 0
15386         mov r0, 1
15387         strd r1, r0, [r2]
15388      in Thumb mode assuming that r1 is free.
15389      For ARM mode do the same but only if the starting register
15390      can be made to be even.  */
15391   if (const_store
15392       && REGNO (operands[0]) == REGNO (operands[1])
15393       && INTVAL (operands[4]) != INTVAL (operands[5]))
15394     {
15395     if (TARGET_THUMB2)
15396       {
15397         CLEAR_HARD_REG_SET (regset);
15398         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15399         if (tmp == NULL_RTX)
15400           return false;
15401
15402         /* Use the new register in the first load to ensure that
15403            if the original input register is not dead after peephole,
15404            then it will have the correct constant value.  */
15405         operands[0] = tmp;
15406       }
15407     else if (TARGET_ARM)
15408       {
15409         int regno = REGNO (operands[0]);
15410         if (!peep2_reg_dead_p (4, operands[0]))
15411           {
15412             /* When the input register is even and is not dead after the
15413                pattern, it has to hold the second constant but we cannot
15414                form a legal STRD in ARM mode with this register as the second
15415                register.  */
15416             if (regno % 2 == 0)
15417               return false;
15418
15419             /* Is regno-1 free? */
15420             SET_HARD_REG_SET (regset);
15421             CLEAR_HARD_REG_BIT(regset, regno - 1);
15422             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15423             if (tmp == NULL_RTX)
15424               return false;
15425
15426             operands[0] = tmp;
15427           }
15428         else
15429           {
15430             /* Find a DImode register.  */
15431             CLEAR_HARD_REG_SET (regset);
15432             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15433             if (tmp != NULL_RTX)
15434               {
15435                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15436                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15437               }
15438             else
15439               {
15440                 /* Can we use the input register to form a DI register?  */
15441                 SET_HARD_REG_SET (regset);
15442                 CLEAR_HARD_REG_BIT(regset,
15443                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15444                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15445                 if (tmp == NULL_RTX)
15446                   return false;
15447                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15448               }
15449           }
15450
15451         gcc_assert (operands[0] != NULL_RTX);
15452         gcc_assert (operands[1] != NULL_RTX);
15453         gcc_assert (REGNO (operands[0]) % 2 == 0);
15454         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15455       }
15456     }
15457
15458   /* Make sure the instructions are ordered with lower memory access first.  */
15459   if (offsets[0] > offsets[1])
15460     {
15461       gap = offsets[0] - offsets[1];
15462       offset = offsets[1];
15463
15464       /* Swap the instructions such that lower memory is accessed first.  */
15465       std::swap (operands[0], operands[1]);
15466       std::swap (operands[2], operands[3]);
15467       if (const_store)
15468         std::swap (operands[4], operands[5]);
15469     }
15470   else
15471     {
15472       gap = offsets[1] - offsets[0];
15473       offset = offsets[0];
15474     }
15475
15476   /* Make sure accesses are to consecutive memory locations.  */
15477   if (gap != 4)
15478     return false;
15479
15480   /* Make sure we generate legal instructions.  */
15481   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15482                              false, load))
15483     return true;
15484
15485   /* In Thumb state, where registers are almost unconstrained, there
15486      is little hope to fix it.  */
15487   if (TARGET_THUMB2)
15488     return false;
15489
15490   if (load && commute)
15491     {
15492       /* Try reordering registers.  */
15493       std::swap (operands[0], operands[1]);
15494       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15495                                  false, load))
15496         return true;
15497     }
15498
15499   if (const_store)
15500     {
15501       /* If input registers are dead after this pattern, they can be
15502          reordered or replaced by other registers that are free in the
15503          current pattern.  */
15504       if (!peep2_reg_dead_p (4, operands[0])
15505           || !peep2_reg_dead_p (4, operands[1]))
15506         return false;
15507
15508       /* Try to reorder the input registers.  */
15509       /* For example, the code
15510            mov r0, 0
15511            mov r1, 1
15512            str r1, [r2]
15513            str r0, [r2, #4]
15514          can be transformed into
15515            mov r1, 0
15516            mov r0, 1
15517            strd r0, [r2]
15518       */
15519       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15520                                   false, false))
15521         {
15522           std::swap (operands[0], operands[1]);
15523           return true;
15524         }
15525
15526       /* Try to find a free DI register.  */
15527       CLEAR_HARD_REG_SET (regset);
15528       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15529       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15530       while (true)
15531         {
15532           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15533           if (tmp == NULL_RTX)
15534             return false;
15535
15536           /* DREG must be an even-numbered register in DImode.
15537              Split it into SI registers.  */
15538           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15539           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15540           gcc_assert (operands[0] != NULL_RTX);
15541           gcc_assert (operands[1] != NULL_RTX);
15542           gcc_assert (REGNO (operands[0]) % 2 == 0);
15543           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15544
15545           return (operands_ok_ldrd_strd (operands[0], operands[1],
15546                                          base, offset,
15547                                          false, load));
15548         }
15549     }
15550
15551   return false;
15552 }
15553
15554
15555
15556 \f
15557 /* Print a symbolic form of X to the debug file, F.  */
15558 static void
15559 arm_print_value (FILE *f, rtx x)
15560 {
15561   switch (GET_CODE (x))
15562     {
15563     case CONST_INT:
15564       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15565       return;
15566
15567     case CONST_DOUBLE:
15568       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15569       return;
15570
15571     case CONST_VECTOR:
15572       {
15573         int i;
15574
15575         fprintf (f, "<");
15576         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15577           {
15578             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15579             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15580               fputc (',', f);
15581           }
15582         fprintf (f, ">");
15583       }
15584       return;
15585
15586     case CONST_STRING:
15587       fprintf (f, "\"%s\"", XSTR (x, 0));
15588       return;
15589
15590     case SYMBOL_REF:
15591       fprintf (f, "`%s'", XSTR (x, 0));
15592       return;
15593
15594     case LABEL_REF:
15595       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15596       return;
15597
15598     case CONST:
15599       arm_print_value (f, XEXP (x, 0));
15600       return;
15601
15602     case PLUS:
15603       arm_print_value (f, XEXP (x, 0));
15604       fprintf (f, "+");
15605       arm_print_value (f, XEXP (x, 1));
15606       return;
15607
15608     case PC:
15609       fprintf (f, "pc");
15610       return;
15611
15612     default:
15613       fprintf (f, "????");
15614       return;
15615     }
15616 }
15617 \f
15618 /* Routines for manipulation of the constant pool.  */
15619
15620 /* Arm instructions cannot load a large constant directly into a
15621    register; they have to come from a pc relative load.  The constant
15622    must therefore be placed in the addressable range of the pc
15623    relative load.  Depending on the precise pc relative load
15624    instruction the range is somewhere between 256 bytes and 4k.  This
15625    means that we often have to dump a constant inside a function, and
15626    generate code to branch around it.
15627
15628    It is important to minimize this, since the branches will slow
15629    things down and make the code larger.
15630
15631    Normally we can hide the table after an existing unconditional
15632    branch so that there is no interruption of the flow, but in the
15633    worst case the code looks like this:
15634
15635         ldr     rn, L1
15636         ...
15637         b       L2
15638         align
15639         L1:     .long value
15640         L2:
15641         ...
15642
15643         ldr     rn, L3
15644         ...
15645         b       L4
15646         align
15647         L3:     .long value
15648         L4:
15649         ...
15650
15651    We fix this by performing a scan after scheduling, which notices
15652    which instructions need to have their operands fetched from the
15653    constant table and builds the table.
15654
15655    The algorithm starts by building a table of all the constants that
15656    need fixing up and all the natural barriers in the function (places
15657    where a constant table can be dropped without breaking the flow).
15658    For each fixup we note how far the pc-relative replacement will be
15659    able to reach and the offset of the instruction into the function.
15660
15661    Having built the table we then group the fixes together to form
15662    tables that are as large as possible (subject to addressing
15663    constraints) and emit each table of constants after the last
15664    barrier that is within range of all the instructions in the group.
15665    If a group does not contain a barrier, then we forcibly create one
15666    by inserting a jump instruction into the flow.  Once the table has
15667    been inserted, the insns are then modified to reference the
15668    relevant entry in the pool.
15669
15670    Possible enhancements to the algorithm (not implemented) are:
15671
15672    1) For some processors and object formats, there may be benefit in
15673    aligning the pools to the start of cache lines; this alignment
15674    would need to be taken into account when calculating addressability
15675    of a pool.  */
15676
15677 /* These typedefs are located at the start of this file, so that
15678    they can be used in the prototypes there.  This comment is to
15679    remind readers of that fact so that the following structures
15680    can be understood more easily.
15681
15682      typedef struct minipool_node    Mnode;
15683      typedef struct minipool_fixup   Mfix;  */
15684
15685 struct minipool_node
15686 {
15687   /* Doubly linked chain of entries.  */
15688   Mnode * next;
15689   Mnode * prev;
15690   /* The maximum offset into the code that this entry can be placed.  While
15691      pushing fixes for forward references, all entries are sorted in order
15692      of increasing max_address.  */
15693   HOST_WIDE_INT max_address;
15694   /* Similarly for an entry inserted for a backwards ref.  */
15695   HOST_WIDE_INT min_address;
15696   /* The number of fixes referencing this entry.  This can become zero
15697      if we "unpush" an entry.  In this case we ignore the entry when we
15698      come to emit the code.  */
15699   int refcount;
15700   /* The offset from the start of the minipool.  */
15701   HOST_WIDE_INT offset;
15702   /* The value in table.  */
15703   rtx value;
15704   /* The mode of value.  */
15705   machine_mode mode;
15706   /* The size of the value.  With iWMMXt enabled
15707      sizes > 4 also imply an alignment of 8-bytes.  */
15708   int fix_size;
15709 };
15710
15711 struct minipool_fixup
15712 {
15713   Mfix *            next;
15714   rtx_insn *        insn;
15715   HOST_WIDE_INT     address;
15716   rtx *             loc;
15717   machine_mode mode;
15718   int               fix_size;
15719   rtx               value;
15720   Mnode *           minipool;
15721   HOST_WIDE_INT     forwards;
15722   HOST_WIDE_INT     backwards;
15723 };
15724
15725 /* Fixes less than a word need padding out to a word boundary.  */
15726 #define MINIPOOL_FIX_SIZE(mode) \
15727   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15728
15729 static Mnode *  minipool_vector_head;
15730 static Mnode *  minipool_vector_tail;
15731 static rtx_code_label   *minipool_vector_label;
15732 static int      minipool_pad;
15733
15734 /* The linked list of all minipool fixes required for this function.  */
15735 Mfix *          minipool_fix_head;
15736 Mfix *          minipool_fix_tail;
15737 /* The fix entry for the current minipool, once it has been placed.  */
15738 Mfix *          minipool_barrier;
15739
15740 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15741 #define JUMP_TABLES_IN_TEXT_SECTION 0
15742 #endif
15743
15744 static HOST_WIDE_INT
15745 get_jump_table_size (rtx_jump_table_data *insn)
15746 {
15747   /* ADDR_VECs only take room if read-only data does into the text
15748      section.  */
15749   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15750     {
15751       rtx body = PATTERN (insn);
15752       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15753       HOST_WIDE_INT size;
15754       HOST_WIDE_INT modesize;
15755
15756       modesize = GET_MODE_SIZE (GET_MODE (body));
15757       size = modesize * XVECLEN (body, elt);
15758       switch (modesize)
15759         {
15760         case 1:
15761           /* Round up size  of TBB table to a halfword boundary.  */
15762           size = (size + 1) & ~HOST_WIDE_INT_1;
15763           break;
15764         case 2:
15765           /* No padding necessary for TBH.  */
15766           break;
15767         case 4:
15768           /* Add two bytes for alignment on Thumb.  */
15769           if (TARGET_THUMB)
15770             size += 2;
15771           break;
15772         default:
15773           gcc_unreachable ();
15774         }
15775       return size;
15776     }
15777
15778   return 0;
15779 }
15780
15781 /* Return the maximum amount of padding that will be inserted before
15782    label LABEL.  */
15783
15784 static HOST_WIDE_INT
15785 get_label_padding (rtx label)
15786 {
15787   HOST_WIDE_INT align, min_insn_size;
15788
15789   align = 1 << label_to_alignment (label);
15790   min_insn_size = TARGET_THUMB ? 2 : 4;
15791   return align > min_insn_size ? align - min_insn_size : 0;
15792 }
15793
15794 /* Move a minipool fix MP from its current location to before MAX_MP.
15795    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15796    constraints may need updating.  */
15797 static Mnode *
15798 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15799                                HOST_WIDE_INT max_address)
15800 {
15801   /* The code below assumes these are different.  */
15802   gcc_assert (mp != max_mp);
15803
15804   if (max_mp == NULL)
15805     {
15806       if (max_address < mp->max_address)
15807         mp->max_address = max_address;
15808     }
15809   else
15810     {
15811       if (max_address > max_mp->max_address - mp->fix_size)
15812         mp->max_address = max_mp->max_address - mp->fix_size;
15813       else
15814         mp->max_address = max_address;
15815
15816       /* Unlink MP from its current position.  Since max_mp is non-null,
15817        mp->prev must be non-null.  */
15818       mp->prev->next = mp->next;
15819       if (mp->next != NULL)
15820         mp->next->prev = mp->prev;
15821       else
15822         minipool_vector_tail = mp->prev;
15823
15824       /* Re-insert it before MAX_MP.  */
15825       mp->next = max_mp;
15826       mp->prev = max_mp->prev;
15827       max_mp->prev = mp;
15828
15829       if (mp->prev != NULL)
15830         mp->prev->next = mp;
15831       else
15832         minipool_vector_head = mp;
15833     }
15834
15835   /* Save the new entry.  */
15836   max_mp = mp;
15837
15838   /* Scan over the preceding entries and adjust their addresses as
15839      required.  */
15840   while (mp->prev != NULL
15841          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15842     {
15843       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15844       mp = mp->prev;
15845     }
15846
15847   return max_mp;
15848 }
15849
15850 /* Add a constant to the minipool for a forward reference.  Returns the
15851    node added or NULL if the constant will not fit in this pool.  */
15852 static Mnode *
15853 add_minipool_forward_ref (Mfix *fix)
15854 {
15855   /* If set, max_mp is the first pool_entry that has a lower
15856      constraint than the one we are trying to add.  */
15857   Mnode *       max_mp = NULL;
15858   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15859   Mnode *       mp;
15860
15861   /* If the minipool starts before the end of FIX->INSN then this FIX
15862      can not be placed into the current pool.  Furthermore, adding the
15863      new constant pool entry may cause the pool to start FIX_SIZE bytes
15864      earlier.  */
15865   if (minipool_vector_head &&
15866       (fix->address + get_attr_length (fix->insn)
15867        >= minipool_vector_head->max_address - fix->fix_size))
15868     return NULL;
15869
15870   /* Scan the pool to see if a constant with the same value has
15871      already been added.  While we are doing this, also note the
15872      location where we must insert the constant if it doesn't already
15873      exist.  */
15874   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15875     {
15876       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15877           && fix->mode == mp->mode
15878           && (!LABEL_P (fix->value)
15879               || (CODE_LABEL_NUMBER (fix->value)
15880                   == CODE_LABEL_NUMBER (mp->value)))
15881           && rtx_equal_p (fix->value, mp->value))
15882         {
15883           /* More than one fix references this entry.  */
15884           mp->refcount++;
15885           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15886         }
15887
15888       /* Note the insertion point if necessary.  */
15889       if (max_mp == NULL
15890           && mp->max_address > max_address)
15891         max_mp = mp;
15892
15893       /* If we are inserting an 8-bytes aligned quantity and
15894          we have not already found an insertion point, then
15895          make sure that all such 8-byte aligned quantities are
15896          placed at the start of the pool.  */
15897       if (ARM_DOUBLEWORD_ALIGN
15898           && max_mp == NULL
15899           && fix->fix_size >= 8
15900           && mp->fix_size < 8)
15901         {
15902           max_mp = mp;
15903           max_address = mp->max_address;
15904         }
15905     }
15906
15907   /* The value is not currently in the minipool, so we need to create
15908      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15909      the end of the list since the placement is less constrained than
15910      any existing entry.  Otherwise, we insert the new fix before
15911      MAX_MP and, if necessary, adjust the constraints on the other
15912      entries.  */
15913   mp = XNEW (Mnode);
15914   mp->fix_size = fix->fix_size;
15915   mp->mode = fix->mode;
15916   mp->value = fix->value;
15917   mp->refcount = 1;
15918   /* Not yet required for a backwards ref.  */
15919   mp->min_address = -65536;
15920
15921   if (max_mp == NULL)
15922     {
15923       mp->max_address = max_address;
15924       mp->next = NULL;
15925       mp->prev = minipool_vector_tail;
15926
15927       if (mp->prev == NULL)
15928         {
15929           minipool_vector_head = mp;
15930           minipool_vector_label = gen_label_rtx ();
15931         }
15932       else
15933         mp->prev->next = mp;
15934
15935       minipool_vector_tail = mp;
15936     }
15937   else
15938     {
15939       if (max_address > max_mp->max_address - mp->fix_size)
15940         mp->max_address = max_mp->max_address - mp->fix_size;
15941       else
15942         mp->max_address = max_address;
15943
15944       mp->next = max_mp;
15945       mp->prev = max_mp->prev;
15946       max_mp->prev = mp;
15947       if (mp->prev != NULL)
15948         mp->prev->next = mp;
15949       else
15950         minipool_vector_head = mp;
15951     }
15952
15953   /* Save the new entry.  */
15954   max_mp = mp;
15955
15956   /* Scan over the preceding entries and adjust their addresses as
15957      required.  */
15958   while (mp->prev != NULL
15959          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15960     {
15961       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15962       mp = mp->prev;
15963     }
15964
15965   return max_mp;
15966 }
15967
15968 static Mnode *
15969 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15970                                 HOST_WIDE_INT  min_address)
15971 {
15972   HOST_WIDE_INT offset;
15973
15974   /* The code below assumes these are different.  */
15975   gcc_assert (mp != min_mp);
15976
15977   if (min_mp == NULL)
15978     {
15979       if (min_address > mp->min_address)
15980         mp->min_address = min_address;
15981     }
15982   else
15983     {
15984       /* We will adjust this below if it is too loose.  */
15985       mp->min_address = min_address;
15986
15987       /* Unlink MP from its current position.  Since min_mp is non-null,
15988          mp->next must be non-null.  */
15989       mp->next->prev = mp->prev;
15990       if (mp->prev != NULL)
15991         mp->prev->next = mp->next;
15992       else
15993         minipool_vector_head = mp->next;
15994
15995       /* Reinsert it after MIN_MP.  */
15996       mp->prev = min_mp;
15997       mp->next = min_mp->next;
15998       min_mp->next = mp;
15999       if (mp->next != NULL)
16000         mp->next->prev = mp;
16001       else
16002         minipool_vector_tail = mp;
16003     }
16004
16005   min_mp = mp;
16006
16007   offset = 0;
16008   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16009     {
16010       mp->offset = offset;
16011       if (mp->refcount > 0)
16012         offset += mp->fix_size;
16013
16014       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16015         mp->next->min_address = mp->min_address + mp->fix_size;
16016     }
16017
16018   return min_mp;
16019 }
16020
16021 /* Add a constant to the minipool for a backward reference.  Returns the
16022    node added or NULL if the constant will not fit in this pool.
16023
16024    Note that the code for insertion for a backwards reference can be
16025    somewhat confusing because the calculated offsets for each fix do
16026    not take into account the size of the pool (which is still under
16027    construction.  */
16028 static Mnode *
16029 add_minipool_backward_ref (Mfix *fix)
16030 {
16031   /* If set, min_mp is the last pool_entry that has a lower constraint
16032      than the one we are trying to add.  */
16033   Mnode *min_mp = NULL;
16034   /* This can be negative, since it is only a constraint.  */
16035   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16036   Mnode *mp;
16037
16038   /* If we can't reach the current pool from this insn, or if we can't
16039      insert this entry at the end of the pool without pushing other
16040      fixes out of range, then we don't try.  This ensures that we
16041      can't fail later on.  */
16042   if (min_address >= minipool_barrier->address
16043       || (minipool_vector_tail->min_address + fix->fix_size
16044           >= minipool_barrier->address))
16045     return NULL;
16046
16047   /* Scan the pool to see if a constant with the same value has
16048      already been added.  While we are doing this, also note the
16049      location where we must insert the constant if it doesn't already
16050      exist.  */
16051   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16052     {
16053       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16054           && fix->mode == mp->mode
16055           && (!LABEL_P (fix->value)
16056               || (CODE_LABEL_NUMBER (fix->value)
16057                   == CODE_LABEL_NUMBER (mp->value)))
16058           && rtx_equal_p (fix->value, mp->value)
16059           /* Check that there is enough slack to move this entry to the
16060              end of the table (this is conservative).  */
16061           && (mp->max_address
16062               > (minipool_barrier->address
16063                  + minipool_vector_tail->offset
16064                  + minipool_vector_tail->fix_size)))
16065         {
16066           mp->refcount++;
16067           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16068         }
16069
16070       if (min_mp != NULL)
16071         mp->min_address += fix->fix_size;
16072       else
16073         {
16074           /* Note the insertion point if necessary.  */
16075           if (mp->min_address < min_address)
16076             {
16077               /* For now, we do not allow the insertion of 8-byte alignment
16078                  requiring nodes anywhere but at the start of the pool.  */
16079               if (ARM_DOUBLEWORD_ALIGN
16080                   && fix->fix_size >= 8 && mp->fix_size < 8)
16081                 return NULL;
16082               else
16083                 min_mp = mp;
16084             }
16085           else if (mp->max_address
16086                    < minipool_barrier->address + mp->offset + fix->fix_size)
16087             {
16088               /* Inserting before this entry would push the fix beyond
16089                  its maximum address (which can happen if we have
16090                  re-located a forwards fix); force the new fix to come
16091                  after it.  */
16092               if (ARM_DOUBLEWORD_ALIGN
16093                   && fix->fix_size >= 8 && mp->fix_size < 8)
16094                 return NULL;
16095               else
16096                 {
16097                   min_mp = mp;
16098                   min_address = mp->min_address + fix->fix_size;
16099                 }
16100             }
16101           /* Do not insert a non-8-byte aligned quantity before 8-byte
16102              aligned quantities.  */
16103           else if (ARM_DOUBLEWORD_ALIGN
16104                    && fix->fix_size < 8
16105                    && mp->fix_size >= 8)
16106             {
16107               min_mp = mp;
16108               min_address = mp->min_address + fix->fix_size;
16109             }
16110         }
16111     }
16112
16113   /* We need to create a new entry.  */
16114   mp = XNEW (Mnode);
16115   mp->fix_size = fix->fix_size;
16116   mp->mode = fix->mode;
16117   mp->value = fix->value;
16118   mp->refcount = 1;
16119   mp->max_address = minipool_barrier->address + 65536;
16120
16121   mp->min_address = min_address;
16122
16123   if (min_mp == NULL)
16124     {
16125       mp->prev = NULL;
16126       mp->next = minipool_vector_head;
16127
16128       if (mp->next == NULL)
16129         {
16130           minipool_vector_tail = mp;
16131           minipool_vector_label = gen_label_rtx ();
16132         }
16133       else
16134         mp->next->prev = mp;
16135
16136       minipool_vector_head = mp;
16137     }
16138   else
16139     {
16140       mp->next = min_mp->next;
16141       mp->prev = min_mp;
16142       min_mp->next = mp;
16143
16144       if (mp->next != NULL)
16145         mp->next->prev = mp;
16146       else
16147         minipool_vector_tail = mp;
16148     }
16149
16150   /* Save the new entry.  */
16151   min_mp = mp;
16152
16153   if (mp->prev)
16154     mp = mp->prev;
16155   else
16156     mp->offset = 0;
16157
16158   /* Scan over the following entries and adjust their offsets.  */
16159   while (mp->next != NULL)
16160     {
16161       if (mp->next->min_address < mp->min_address + mp->fix_size)
16162         mp->next->min_address = mp->min_address + mp->fix_size;
16163
16164       if (mp->refcount)
16165         mp->next->offset = mp->offset + mp->fix_size;
16166       else
16167         mp->next->offset = mp->offset;
16168
16169       mp = mp->next;
16170     }
16171
16172   return min_mp;
16173 }
16174
16175 static void
16176 assign_minipool_offsets (Mfix *barrier)
16177 {
16178   HOST_WIDE_INT offset = 0;
16179   Mnode *mp;
16180
16181   minipool_barrier = barrier;
16182
16183   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16184     {
16185       mp->offset = offset;
16186
16187       if (mp->refcount > 0)
16188         offset += mp->fix_size;
16189     }
16190 }
16191
16192 /* Output the literal table */
16193 static void
16194 dump_minipool (rtx_insn *scan)
16195 {
16196   Mnode * mp;
16197   Mnode * nmp;
16198   int align64 = 0;
16199
16200   if (ARM_DOUBLEWORD_ALIGN)
16201     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16202       if (mp->refcount > 0 && mp->fix_size >= 8)
16203         {
16204           align64 = 1;
16205           break;
16206         }
16207
16208   if (dump_file)
16209     fprintf (dump_file,
16210              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16211              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16212
16213   scan = emit_label_after (gen_label_rtx (), scan);
16214   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16215   scan = emit_label_after (minipool_vector_label, scan);
16216
16217   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16218     {
16219       if (mp->refcount > 0)
16220         {
16221           if (dump_file)
16222             {
16223               fprintf (dump_file,
16224                        ";;  Offset %u, min %ld, max %ld ",
16225                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16226                        (unsigned long) mp->max_address);
16227               arm_print_value (dump_file, mp->value);
16228               fputc ('\n', dump_file);
16229             }
16230
16231           rtx val = copy_rtx (mp->value);
16232
16233           switch (GET_MODE_SIZE (mp->mode))
16234             {
16235 #ifdef HAVE_consttable_1
16236             case 1:
16237               scan = emit_insn_after (gen_consttable_1 (val), scan);
16238               break;
16239
16240 #endif
16241 #ifdef HAVE_consttable_2
16242             case 2:
16243               scan = emit_insn_after (gen_consttable_2 (val), scan);
16244               break;
16245
16246 #endif
16247 #ifdef HAVE_consttable_4
16248             case 4:
16249               scan = emit_insn_after (gen_consttable_4 (val), scan);
16250               break;
16251
16252 #endif
16253 #ifdef HAVE_consttable_8
16254             case 8:
16255               scan = emit_insn_after (gen_consttable_8 (val), scan);
16256               break;
16257
16258 #endif
16259 #ifdef HAVE_consttable_16
16260             case 16:
16261               scan = emit_insn_after (gen_consttable_16 (val), scan);
16262               break;
16263
16264 #endif
16265             default:
16266               gcc_unreachable ();
16267             }
16268         }
16269
16270       nmp = mp->next;
16271       free (mp);
16272     }
16273
16274   minipool_vector_head = minipool_vector_tail = NULL;
16275   scan = emit_insn_after (gen_consttable_end (), scan);
16276   scan = emit_barrier_after (scan);
16277 }
16278
16279 /* Return the cost of forcibly inserting a barrier after INSN.  */
16280 static int
16281 arm_barrier_cost (rtx_insn *insn)
16282 {
16283   /* Basing the location of the pool on the loop depth is preferable,
16284      but at the moment, the basic block information seems to be
16285      corrupt by this stage of the compilation.  */
16286   int base_cost = 50;
16287   rtx_insn *next = next_nonnote_insn (insn);
16288
16289   if (next != NULL && LABEL_P (next))
16290     base_cost -= 20;
16291
16292   switch (GET_CODE (insn))
16293     {
16294     case CODE_LABEL:
16295       /* It will always be better to place the table before the label, rather
16296          than after it.  */
16297       return 50;
16298
16299     case INSN:
16300     case CALL_INSN:
16301       return base_cost;
16302
16303     case JUMP_INSN:
16304       return base_cost - 10;
16305
16306     default:
16307       return base_cost + 10;
16308     }
16309 }
16310
16311 /* Find the best place in the insn stream in the range
16312    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16313    Create the barrier by inserting a jump and add a new fix entry for
16314    it.  */
16315 static Mfix *
16316 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16317 {
16318   HOST_WIDE_INT count = 0;
16319   rtx_barrier *barrier;
16320   rtx_insn *from = fix->insn;
16321   /* The instruction after which we will insert the jump.  */
16322   rtx_insn *selected = NULL;
16323   int selected_cost;
16324   /* The address at which the jump instruction will be placed.  */
16325   HOST_WIDE_INT selected_address;
16326   Mfix * new_fix;
16327   HOST_WIDE_INT max_count = max_address - fix->address;
16328   rtx_code_label *label = gen_label_rtx ();
16329
16330   selected_cost = arm_barrier_cost (from);
16331   selected_address = fix->address;
16332
16333   while (from && count < max_count)
16334     {
16335       rtx_jump_table_data *tmp;
16336       int new_cost;
16337
16338       /* This code shouldn't have been called if there was a natural barrier
16339          within range.  */
16340       gcc_assert (!BARRIER_P (from));
16341
16342       /* Count the length of this insn.  This must stay in sync with the
16343          code that pushes minipool fixes.  */
16344       if (LABEL_P (from))
16345         count += get_label_padding (from);
16346       else
16347         count += get_attr_length (from);
16348
16349       /* If there is a jump table, add its length.  */
16350       if (tablejump_p (from, NULL, &tmp))
16351         {
16352           count += get_jump_table_size (tmp);
16353
16354           /* Jump tables aren't in a basic block, so base the cost on
16355              the dispatch insn.  If we select this location, we will
16356              still put the pool after the table.  */
16357           new_cost = arm_barrier_cost (from);
16358
16359           if (count < max_count
16360               && (!selected || new_cost <= selected_cost))
16361             {
16362               selected = tmp;
16363               selected_cost = new_cost;
16364               selected_address = fix->address + count;
16365             }
16366
16367           /* Continue after the dispatch table.  */
16368           from = NEXT_INSN (tmp);
16369           continue;
16370         }
16371
16372       new_cost = arm_barrier_cost (from);
16373
16374       if (count < max_count
16375           && (!selected || new_cost <= selected_cost))
16376         {
16377           selected = from;
16378           selected_cost = new_cost;
16379           selected_address = fix->address + count;
16380         }
16381
16382       from = NEXT_INSN (from);
16383     }
16384
16385   /* Make sure that we found a place to insert the jump.  */
16386   gcc_assert (selected);
16387
16388   /* Make sure we do not split a call and its corresponding
16389      CALL_ARG_LOCATION note.  */
16390   if (CALL_P (selected))
16391     {
16392       rtx_insn *next = NEXT_INSN (selected);
16393       if (next && NOTE_P (next)
16394           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16395           selected = next;
16396     }
16397
16398   /* Create a new JUMP_INSN that branches around a barrier.  */
16399   from = emit_jump_insn_after (gen_jump (label), selected);
16400   JUMP_LABEL (from) = label;
16401   barrier = emit_barrier_after (from);
16402   emit_label_after (label, barrier);
16403
16404   /* Create a minipool barrier entry for the new barrier.  */
16405   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16406   new_fix->insn = barrier;
16407   new_fix->address = selected_address;
16408   new_fix->next = fix->next;
16409   fix->next = new_fix;
16410
16411   return new_fix;
16412 }
16413
16414 /* Record that there is a natural barrier in the insn stream at
16415    ADDRESS.  */
16416 static void
16417 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16418 {
16419   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16420
16421   fix->insn = insn;
16422   fix->address = address;
16423
16424   fix->next = NULL;
16425   if (minipool_fix_head != NULL)
16426     minipool_fix_tail->next = fix;
16427   else
16428     minipool_fix_head = fix;
16429
16430   minipool_fix_tail = fix;
16431 }
16432
16433 /* Record INSN, which will need fixing up to load a value from the
16434    minipool.  ADDRESS is the offset of the insn since the start of the
16435    function; LOC is a pointer to the part of the insn which requires
16436    fixing; VALUE is the constant that must be loaded, which is of type
16437    MODE.  */
16438 static void
16439 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16440                    machine_mode mode, rtx value)
16441 {
16442   gcc_assert (!arm_disable_literal_pool);
16443   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16444
16445   fix->insn = insn;
16446   fix->address = address;
16447   fix->loc = loc;
16448   fix->mode = mode;
16449   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16450   fix->value = value;
16451   fix->forwards = get_attr_pool_range (insn);
16452   fix->backwards = get_attr_neg_pool_range (insn);
16453   fix->minipool = NULL;
16454
16455   /* If an insn doesn't have a range defined for it, then it isn't
16456      expecting to be reworked by this code.  Better to stop now than
16457      to generate duff assembly code.  */
16458   gcc_assert (fix->forwards || fix->backwards);
16459
16460   /* If an entry requires 8-byte alignment then assume all constant pools
16461      require 4 bytes of padding.  Trying to do this later on a per-pool
16462      basis is awkward because existing pool entries have to be modified.  */
16463   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16464     minipool_pad = 4;
16465
16466   if (dump_file)
16467     {
16468       fprintf (dump_file,
16469                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16470                GET_MODE_NAME (mode),
16471                INSN_UID (insn), (unsigned long) address,
16472                -1 * (long)fix->backwards, (long)fix->forwards);
16473       arm_print_value (dump_file, fix->value);
16474       fprintf (dump_file, "\n");
16475     }
16476
16477   /* Add it to the chain of fixes.  */
16478   fix->next = NULL;
16479
16480   if (minipool_fix_head != NULL)
16481     minipool_fix_tail->next = fix;
16482   else
16483     minipool_fix_head = fix;
16484
16485   minipool_fix_tail = fix;
16486 }
16487
16488 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16489    Returns the number of insns needed, or 99 if we always want to synthesize
16490    the value.  */
16491 int
16492 arm_max_const_double_inline_cost ()
16493 {
16494   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16495 }
16496
16497 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16498    Returns the number of insns needed, or 99 if we don't know how to
16499    do it.  */
16500 int
16501 arm_const_double_inline_cost (rtx val)
16502 {
16503   rtx lowpart, highpart;
16504   machine_mode mode;
16505
16506   mode = GET_MODE (val);
16507
16508   if (mode == VOIDmode)
16509     mode = DImode;
16510
16511   gcc_assert (GET_MODE_SIZE (mode) == 8);
16512
16513   lowpart = gen_lowpart (SImode, val);
16514   highpart = gen_highpart_mode (SImode, mode, val);
16515
16516   gcc_assert (CONST_INT_P (lowpart));
16517   gcc_assert (CONST_INT_P (highpart));
16518
16519   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16520                             NULL_RTX, NULL_RTX, 0, 0)
16521           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16522                               NULL_RTX, NULL_RTX, 0, 0));
16523 }
16524
16525 /* Cost of loading a SImode constant.  */
16526 static inline int
16527 arm_const_inline_cost (enum rtx_code code, rtx val)
16528 {
16529   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16530                            NULL_RTX, NULL_RTX, 1, 0);
16531 }
16532
16533 /* Return true if it is worthwhile to split a 64-bit constant into two
16534    32-bit operations.  This is the case if optimizing for size, or
16535    if we have load delay slots, or if one 32-bit part can be done with
16536    a single data operation.  */
16537 bool
16538 arm_const_double_by_parts (rtx val)
16539 {
16540   machine_mode mode = GET_MODE (val);
16541   rtx part;
16542
16543   if (optimize_size || arm_ld_sched)
16544     return true;
16545
16546   if (mode == VOIDmode)
16547     mode = DImode;
16548
16549   part = gen_highpart_mode (SImode, mode, val);
16550
16551   gcc_assert (CONST_INT_P (part));
16552
16553   if (const_ok_for_arm (INTVAL (part))
16554       || const_ok_for_arm (~INTVAL (part)))
16555     return true;
16556
16557   part = gen_lowpart (SImode, val);
16558
16559   gcc_assert (CONST_INT_P (part));
16560
16561   if (const_ok_for_arm (INTVAL (part))
16562       || const_ok_for_arm (~INTVAL (part)))
16563     return true;
16564
16565   return false;
16566 }
16567
16568 /* Return true if it is possible to inline both the high and low parts
16569    of a 64-bit constant into 32-bit data processing instructions.  */
16570 bool
16571 arm_const_double_by_immediates (rtx val)
16572 {
16573   machine_mode mode = GET_MODE (val);
16574   rtx part;
16575
16576   if (mode == VOIDmode)
16577     mode = DImode;
16578
16579   part = gen_highpart_mode (SImode, mode, val);
16580
16581   gcc_assert (CONST_INT_P (part));
16582
16583   if (!const_ok_for_arm (INTVAL (part)))
16584     return false;
16585
16586   part = gen_lowpart (SImode, val);
16587
16588   gcc_assert (CONST_INT_P (part));
16589
16590   if (!const_ok_for_arm (INTVAL (part)))
16591     return false;
16592
16593   return true;
16594 }
16595
16596 /* Scan INSN and note any of its operands that need fixing.
16597    If DO_PUSHES is false we do not actually push any of the fixups
16598    needed.  */
16599 static void
16600 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16601 {
16602   int opno;
16603
16604   extract_constrain_insn (insn);
16605
16606   if (recog_data.n_alternatives == 0)
16607     return;
16608
16609   /* Fill in recog_op_alt with information about the constraints of
16610      this insn.  */
16611   preprocess_constraints (insn);
16612
16613   const operand_alternative *op_alt = which_op_alt ();
16614   for (opno = 0; opno < recog_data.n_operands; opno++)
16615     {
16616       /* Things we need to fix can only occur in inputs.  */
16617       if (recog_data.operand_type[opno] != OP_IN)
16618         continue;
16619
16620       /* If this alternative is a memory reference, then any mention
16621          of constants in this alternative is really to fool reload
16622          into allowing us to accept one there.  We need to fix them up
16623          now so that we output the right code.  */
16624       if (op_alt[opno].memory_ok)
16625         {
16626           rtx op = recog_data.operand[opno];
16627
16628           if (CONSTANT_P (op))
16629             {
16630               if (do_pushes)
16631                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16632                                    recog_data.operand_mode[opno], op);
16633             }
16634           else if (MEM_P (op)
16635                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16636                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16637             {
16638               if (do_pushes)
16639                 {
16640                   rtx cop = avoid_constant_pool_reference (op);
16641
16642                   /* Casting the address of something to a mode narrower
16643                      than a word can cause avoid_constant_pool_reference()
16644                      to return the pool reference itself.  That's no good to
16645                      us here.  Lets just hope that we can use the
16646                      constant pool value directly.  */
16647                   if (op == cop)
16648                     cop = get_pool_constant (XEXP (op, 0));
16649
16650                   push_minipool_fix (insn, address,
16651                                      recog_data.operand_loc[opno],
16652                                      recog_data.operand_mode[opno], cop);
16653                 }
16654
16655             }
16656         }
16657     }
16658
16659   return;
16660 }
16661
16662 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16663    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16664    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16665    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16666    or four masks, depending on whether it is being computed for a
16667    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16668    respectively.  The tree for the type of the argument or a field within an
16669    argument is passed in ARG_TYPE, the current register this argument or field
16670    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16671    argument or field starts at is passed in STARTING_BIT and the last used bit
16672    is kept in LAST_USED_BIT which is also updated accordingly.  */
16673
16674 static unsigned HOST_WIDE_INT
16675 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16676                                uint32_t * padding_bits_to_clear,
16677                                unsigned starting_bit, int * last_used_bit)
16678
16679 {
16680   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16681
16682   if (TREE_CODE (arg_type) == RECORD_TYPE)
16683     {
16684       unsigned current_bit = starting_bit;
16685       tree field;
16686       long int offset, size;
16687
16688
16689       field = TYPE_FIELDS (arg_type);
16690       while (field)
16691         {
16692           /* The offset within a structure is always an offset from
16693              the start of that structure.  Make sure we take that into the
16694              calculation of the register based offset that we use here.  */
16695           offset = starting_bit;
16696           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16697           offset %= 32;
16698
16699           /* This is the actual size of the field, for bitfields this is the
16700              bitfield width and not the container size.  */
16701           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16702
16703           if (*last_used_bit != offset)
16704             {
16705               if (offset < *last_used_bit)
16706                 {
16707                   /* This field's offset is before the 'last_used_bit', that
16708                      means this field goes on the next register.  So we need to
16709                      pad the rest of the current register and increase the
16710                      register number.  */
16711                   uint32_t mask;
16712                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16713                   mask++;
16714
16715                   padding_bits_to_clear[*regno] |= mask;
16716                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16717                   (*regno)++;
16718                 }
16719               else
16720                 {
16721                   /* Otherwise we pad the bits between the last field's end and
16722                      the start of the new field.  */
16723                   uint32_t mask;
16724
16725                   mask = ((uint32_t)-1) >> (32 - offset);
16726                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16727                   padding_bits_to_clear[*regno] |= mask;
16728                 }
16729               current_bit = offset;
16730             }
16731
16732           /* Calculate further padding bits for inner structs/unions too.  */
16733           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16734             {
16735               *last_used_bit = current_bit;
16736               not_to_clear_reg_mask
16737                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16738                                                   padding_bits_to_clear, offset,
16739                                                   last_used_bit);
16740             }
16741           else
16742             {
16743               /* Update 'current_bit' with this field's size.  If the
16744                  'current_bit' lies in a subsequent register, update 'regno' and
16745                  reset 'current_bit' to point to the current bit in that new
16746                  register.  */
16747               current_bit += size;
16748               while (current_bit >= 32)
16749                 {
16750                   current_bit-=32;
16751                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16752                   (*regno)++;
16753                 }
16754               *last_used_bit = current_bit;
16755             }
16756
16757           field = TREE_CHAIN (field);
16758         }
16759       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16760     }
16761   else if (TREE_CODE (arg_type) == UNION_TYPE)
16762     {
16763       tree field, field_t;
16764       int i, regno_t, field_size;
16765       int max_reg = -1;
16766       int max_bit = -1;
16767       uint32_t mask;
16768       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16769         = {-1, -1, -1, -1};
16770
16771       /* To compute the padding bits in a union we only consider bits as
16772          padding bits if they are always either a padding bit or fall outside a
16773          fields size for all fields in the union.  */
16774       field = TYPE_FIELDS (arg_type);
16775       while (field)
16776         {
16777           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16778             = {0U, 0U, 0U, 0U};
16779           int last_used_bit_t = *last_used_bit;
16780           regno_t = *regno;
16781           field_t = TREE_TYPE (field);
16782
16783           /* If the field's type is either a record or a union make sure to
16784              compute their padding bits too.  */
16785           if (RECORD_OR_UNION_TYPE_P (field_t))
16786             not_to_clear_reg_mask
16787               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16788                                                 &padding_bits_to_clear_t[0],
16789                                                 starting_bit, &last_used_bit_t);
16790           else
16791             {
16792               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16793               regno_t = (field_size / 32) + *regno;
16794               last_used_bit_t = (starting_bit + field_size) % 32;
16795             }
16796
16797           for (i = *regno; i < regno_t; i++)
16798             {
16799               /* For all but the last register used by this field only keep the
16800                  padding bits that were padding bits in this field.  */
16801               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16802             }
16803
16804             /* For the last register, keep all padding bits that were padding
16805                bits in this field and any padding bits that are still valid
16806                as padding bits but fall outside of this field's size.  */
16807             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16808             padding_bits_to_clear_res[regno_t]
16809               &= padding_bits_to_clear_t[regno_t] | mask;
16810
16811           /* Update the maximum size of the fields in terms of registers used
16812              ('max_reg') and the 'last_used_bit' in said register.  */
16813           if (max_reg < regno_t)
16814             {
16815               max_reg = regno_t;
16816               max_bit = last_used_bit_t;
16817             }
16818           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16819             max_bit = last_used_bit_t;
16820
16821           field = TREE_CHAIN (field);
16822         }
16823
16824       /* Update the current padding_bits_to_clear using the intersection of the
16825          padding bits of all the fields.  */
16826       for (i=*regno; i < max_reg; i++)
16827         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16828
16829       /* Do not keep trailing padding bits, we do not know yet whether this
16830          is the end of the argument.  */
16831       mask = ((uint32_t) 1 << max_bit) - 1;
16832       padding_bits_to_clear[max_reg]
16833         |= padding_bits_to_clear_res[max_reg] & mask;
16834
16835       *regno = max_reg;
16836       *last_used_bit = max_bit;
16837     }
16838   else
16839     /* This function should only be used for structs and unions.  */
16840     gcc_unreachable ();
16841
16842   return not_to_clear_reg_mask;
16843 }
16844
16845 /* In the context of ARMv8-M Security Extensions, this function is used for both
16846    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16847    registers are used when returning or passing arguments, which is then
16848    returned as a mask.  It will also compute a mask to indicate padding/unused
16849    bits for each of these registers, and passes this through the
16850    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16851    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16852    the starting register used to pass this argument or return value is passed
16853    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16854    for struct and union types.  */
16855
16856 static unsigned HOST_WIDE_INT
16857 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16858                              uint32_t * padding_bits_to_clear)
16859
16860 {
16861   int last_used_bit = 0;
16862   unsigned HOST_WIDE_INT not_to_clear_mask;
16863
16864   if (RECORD_OR_UNION_TYPE_P (arg_type))
16865     {
16866       not_to_clear_mask
16867         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16868                                          padding_bits_to_clear, 0,
16869                                          &last_used_bit);
16870
16871
16872       /* If the 'last_used_bit' is not zero, that means we are still using a
16873          part of the last 'regno'.  In such cases we must clear the trailing
16874          bits.  Otherwise we are not using regno and we should mark it as to
16875          clear.  */
16876       if (last_used_bit != 0)
16877         padding_bits_to_clear[regno]
16878           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16879       else
16880         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16881     }
16882   else
16883     {
16884       not_to_clear_mask = 0;
16885       /* We are not dealing with structs nor unions.  So these arguments may be
16886          passed in floating point registers too.  In some cases a BLKmode is
16887          used when returning or passing arguments in multiple VFP registers.  */
16888       if (GET_MODE (arg_rtx) == BLKmode)
16889         {
16890           int i, arg_regs;
16891           rtx reg;
16892
16893           /* This should really only occur when dealing with the hard-float
16894              ABI.  */
16895           gcc_assert (TARGET_HARD_FLOAT_ABI);
16896
16897           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16898             {
16899               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16900               gcc_assert (REG_P (reg));
16901
16902               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16903
16904               /* If we are dealing with DF mode, make sure we don't
16905                  clear either of the registers it addresses.  */
16906               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16907               if (arg_regs > 1)
16908                 {
16909                   unsigned HOST_WIDE_INT mask;
16910                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16911                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16912                   not_to_clear_mask |= mask;
16913                 }
16914             }
16915         }
16916       else
16917         {
16918           /* Otherwise we can rely on the MODE to determine how many registers
16919              are being used by this argument.  */
16920           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16921           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16922           if (arg_regs > 1)
16923             {
16924               unsigned HOST_WIDE_INT
16925               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16926               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16927               not_to_clear_mask |= mask;
16928             }
16929         }
16930     }
16931
16932   return not_to_clear_mask;
16933 }
16934
16935 /* Clears caller saved registers not used to pass arguments before a
16936    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16937    registers is done in __gnu_cmse_nonsecure_call libcall.
16938    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16939
16940 static void
16941 cmse_nonsecure_call_clear_caller_saved (void)
16942 {
16943   basic_block bb;
16944
16945   FOR_EACH_BB_FN (bb, cfun)
16946     {
16947       rtx_insn *insn;
16948
16949       FOR_BB_INSNS (bb, insn)
16950         {
16951           uint64_t to_clear_mask, float_mask;
16952           rtx_insn *seq;
16953           rtx pat, call, unspec, reg, cleared_reg, tmp;
16954           unsigned int regno, maxregno;
16955           rtx address;
16956           CUMULATIVE_ARGS args_so_far_v;
16957           cumulative_args_t args_so_far;
16958           tree arg_type, fntype;
16959           bool using_r4, first_param = true;
16960           function_args_iterator args_iter;
16961           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16962           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16963
16964           if (!NONDEBUG_INSN_P (insn))
16965             continue;
16966
16967           if (!CALL_P (insn))
16968             continue;
16969
16970           pat = PATTERN (insn);
16971           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16972           call = XVECEXP (pat, 0, 0);
16973
16974           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16975           if (GET_CODE (call) == SET)
16976               call = SET_SRC (call);
16977
16978           /* Check if it is a cmse_nonsecure_call.  */
16979           unspec = XEXP (call, 0);
16980           if (GET_CODE (unspec) != UNSPEC
16981               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16982             continue;
16983
16984           /* Determine the caller-saved registers we need to clear.  */
16985           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16986           maxregno = NUM_ARG_REGS - 1;
16987           /* Only look at the caller-saved floating point registers in case of
16988              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
16989              lazy store and loads which clear both caller- and callee-saved
16990              registers.  */
16991           if (TARGET_HARD_FLOAT_ABI)
16992             {
16993               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16994               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16995               to_clear_mask |= float_mask;
16996               maxregno = D7_VFP_REGNUM;
16997             }
16998
16999           /* Make sure the register used to hold the function address is not
17000              cleared.  */
17001           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17002           gcc_assert (MEM_P (address));
17003           gcc_assert (REG_P (XEXP (address, 0)));
17004           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17005
17006           /* Set basic block of call insn so that df rescan is performed on
17007              insns inserted here.  */
17008           set_block_for_insn (insn, bb);
17009           df_set_flags (DF_DEFER_INSN_RESCAN);
17010           start_sequence ();
17011
17012           /* Make sure the scheduler doesn't schedule other insns beyond
17013              here.  */
17014           emit_insn (gen_blockage ());
17015
17016           /* Walk through all arguments and clear registers appropriately.
17017           */
17018           fntype = TREE_TYPE (MEM_EXPR (address));
17019           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17020                                     NULL_TREE);
17021           args_so_far = pack_cumulative_args (&args_so_far_v);
17022           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17023             {
17024               rtx arg_rtx;
17025               machine_mode arg_mode = TYPE_MODE (arg_type);
17026
17027               if (VOID_TYPE_P (arg_type))
17028                 continue;
17029
17030               if (!first_param)
17031                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17032                                           true);
17033
17034               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17035                                           true);
17036               gcc_assert (REG_P (arg_rtx));
17037               to_clear_mask
17038                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17039                                                REGNO (arg_rtx),
17040                                                padding_bits_to_clear_ptr);
17041
17042               first_param = false;
17043             }
17044
17045           /* Clear padding bits where needed.  */
17046           cleared_reg = XEXP (address, 0);
17047           reg = gen_rtx_REG (SImode, IP_REGNUM);
17048           using_r4 = false;
17049           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17050             {
17051               if (padding_bits_to_clear[regno] == 0)
17052                 continue;
17053
17054               /* If this is a Thumb-1 target copy the address of the function
17055                  we are calling from 'r4' into 'ip' such that we can use r4 to
17056                  clear the unused bits in the arguments.  */
17057               if (TARGET_THUMB1 && !using_r4)
17058                 {
17059                   using_r4 =  true;
17060                   reg = cleared_reg;
17061                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17062                                           reg);
17063                 }
17064
17065               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17066               emit_move_insn (reg, tmp);
17067               /* Also fill the top half of the negated
17068                  padding_bits_to_clear.  */
17069               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17070                 {
17071                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17072                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17073                                                                 GEN_INT (16),
17074                                                                 GEN_INT (16)),
17075                                           tmp));
17076                 }
17077
17078               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17079                                      gen_rtx_REG (SImode, regno),
17080                                      reg));
17081
17082             }
17083           if (using_r4)
17084             emit_move_insn (cleared_reg,
17085                             gen_rtx_REG (SImode, IP_REGNUM));
17086
17087           /* We use right shift and left shift to clear the LSB of the address
17088              we jump to instead of using bic, to avoid having to use an extra
17089              register on Thumb-1.  */
17090           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17091           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17092           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17093           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17094
17095           /* Clearing all registers that leak before doing a non-secure
17096              call.  */
17097           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17098             {
17099               if (!(to_clear_mask & (1LL << regno)))
17100                 continue;
17101
17102               /* If regno is an even vfp register and its successor is also to
17103                  be cleared, use vmov.  */
17104               if (IS_VFP_REGNUM (regno))
17105                 {
17106                   if (TARGET_VFP_DOUBLE
17107                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17108                       && to_clear_mask & (1LL << (regno + 1)))
17109                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17110                                     CONST0_RTX (DFmode));
17111                   else
17112                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17113                                     CONST0_RTX (SFmode));
17114                 }
17115               else
17116                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17117             }
17118
17119           seq = get_insns ();
17120           end_sequence ();
17121           emit_insn_before (seq, insn);
17122
17123         }
17124     }
17125 }
17126
17127 /* Rewrite move insn into subtract of 0 if the condition codes will
17128    be useful in next conditional jump insn.  */
17129
17130 static void
17131 thumb1_reorg (void)
17132 {
17133   basic_block bb;
17134
17135   FOR_EACH_BB_FN (bb, cfun)
17136     {
17137       rtx dest, src;
17138       rtx cmp, op0, op1, set = NULL;
17139       rtx_insn *prev, *insn = BB_END (bb);
17140       bool insn_clobbered = false;
17141
17142       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17143         insn = PREV_INSN (insn);
17144
17145       /* Find the last cbranchsi4_insn in basic block BB.  */
17146       if (insn == BB_HEAD (bb)
17147           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17148         continue;
17149
17150       /* Get the register with which we are comparing.  */
17151       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17152       op0 = XEXP (cmp, 0);
17153       op1 = XEXP (cmp, 1);
17154
17155       /* Check that comparison is against ZERO.  */
17156       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17157         continue;
17158
17159       /* Find the first flag setting insn before INSN in basic block BB.  */
17160       gcc_assert (insn != BB_HEAD (bb));
17161       for (prev = PREV_INSN (insn);
17162            (!insn_clobbered
17163             && prev != BB_HEAD (bb)
17164             && (NOTE_P (prev)
17165                 || DEBUG_INSN_P (prev)
17166                 || ((set = single_set (prev)) != NULL
17167                     && get_attr_conds (prev) == CONDS_NOCOND)));
17168            prev = PREV_INSN (prev))
17169         {
17170           if (reg_set_p (op0, prev))
17171             insn_clobbered = true;
17172         }
17173
17174       /* Skip if op0 is clobbered by insn other than prev. */
17175       if (insn_clobbered)
17176         continue;
17177
17178       if (!set)
17179         continue;
17180
17181       dest = SET_DEST (set);
17182       src = SET_SRC (set);
17183       if (!low_register_operand (dest, SImode)
17184           || !low_register_operand (src, SImode))
17185         continue;
17186
17187       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17188          in INSN.  Both src and dest of the move insn are checked.  */
17189       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17190         {
17191           dest = copy_rtx (dest);
17192           src = copy_rtx (src);
17193           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17194           PATTERN (prev) = gen_rtx_SET (dest, src);
17195           INSN_CODE (prev) = -1;
17196           /* Set test register in INSN to dest.  */
17197           XEXP (cmp, 0) = copy_rtx (dest);
17198           INSN_CODE (insn) = -1;
17199         }
17200     }
17201 }
17202
17203 /* Convert instructions to their cc-clobbering variant if possible, since
17204    that allows us to use smaller encodings.  */
17205
17206 static void
17207 thumb2_reorg (void)
17208 {
17209   basic_block bb;
17210   regset_head live;
17211
17212   INIT_REG_SET (&live);
17213
17214   /* We are freeing block_for_insn in the toplev to keep compatibility
17215      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17216   compute_bb_for_insn ();
17217   df_analyze ();
17218
17219   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17220
17221   FOR_EACH_BB_FN (bb, cfun)
17222     {
17223       if ((current_tune->disparage_flag_setting_t16_encodings
17224            == tune_params::DISPARAGE_FLAGS_ALL)
17225           && optimize_bb_for_speed_p (bb))
17226         continue;
17227
17228       rtx_insn *insn;
17229       Convert_Action action = SKIP;
17230       Convert_Action action_for_partial_flag_setting
17231         = ((current_tune->disparage_flag_setting_t16_encodings
17232             != tune_params::DISPARAGE_FLAGS_NEITHER)
17233            && optimize_bb_for_speed_p (bb))
17234           ? SKIP : CONV;
17235
17236       COPY_REG_SET (&live, DF_LR_OUT (bb));
17237       df_simulate_initialize_backwards (bb, &live);
17238       FOR_BB_INSNS_REVERSE (bb, insn)
17239         {
17240           if (NONJUMP_INSN_P (insn)
17241               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17242               && GET_CODE (PATTERN (insn)) == SET)
17243             {
17244               action = SKIP;
17245               rtx pat = PATTERN (insn);
17246               rtx dst = XEXP (pat, 0);
17247               rtx src = XEXP (pat, 1);
17248               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17249
17250               if (UNARY_P (src) || BINARY_P (src))
17251                   op0 = XEXP (src, 0);
17252
17253               if (BINARY_P (src))
17254                   op1 = XEXP (src, 1);
17255
17256               if (low_register_operand (dst, SImode))
17257                 {
17258                   switch (GET_CODE (src))
17259                     {
17260                     case PLUS:
17261                       /* Adding two registers and storing the result
17262                          in the first source is already a 16-bit
17263                          operation.  */
17264                       if (rtx_equal_p (dst, op0)
17265                           && register_operand (op1, SImode))
17266                         break;
17267
17268                       if (low_register_operand (op0, SImode))
17269                         {
17270                           /* ADDS <Rd>,<Rn>,<Rm>  */
17271                           if (low_register_operand (op1, SImode))
17272                             action = CONV;
17273                           /* ADDS <Rdn>,#<imm8>  */
17274                           /* SUBS <Rdn>,#<imm8>  */
17275                           else if (rtx_equal_p (dst, op0)
17276                                    && CONST_INT_P (op1)
17277                                    && IN_RANGE (INTVAL (op1), -255, 255))
17278                             action = CONV;
17279                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17280                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17281                           else if (CONST_INT_P (op1)
17282                                    && IN_RANGE (INTVAL (op1), -7, 7))
17283                             action = CONV;
17284                         }
17285                       /* ADCS <Rd>, <Rn>  */
17286                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17287                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17288                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17289                                                        SImode)
17290                               && COMPARISON_P (op1)
17291                               && cc_register (XEXP (op1, 0), VOIDmode)
17292                               && maybe_get_arm_condition_code (op1) == ARM_CS
17293                               && XEXP (op1, 1) == const0_rtx)
17294                         action = CONV;
17295                       break;
17296
17297                     case MINUS:
17298                       /* RSBS <Rd>,<Rn>,#0
17299                          Not handled here: see NEG below.  */
17300                       /* SUBS <Rd>,<Rn>,#<imm3>
17301                          SUBS <Rdn>,#<imm8>
17302                          Not handled here: see PLUS above.  */
17303                       /* SUBS <Rd>,<Rn>,<Rm>  */
17304                       if (low_register_operand (op0, SImode)
17305                           && low_register_operand (op1, SImode))
17306                             action = CONV;
17307                       break;
17308
17309                     case MULT:
17310                       /* MULS <Rdm>,<Rn>,<Rdm>
17311                          As an exception to the rule, this is only used
17312                          when optimizing for size since MULS is slow on all
17313                          known implementations.  We do not even want to use
17314                          MULS in cold code, if optimizing for speed, so we
17315                          test the global flag here.  */
17316                       if (!optimize_size)
17317                         break;
17318                       /* Fall through.  */
17319                     case AND:
17320                     case IOR:
17321                     case XOR:
17322                       /* ANDS <Rdn>,<Rm>  */
17323                       if (rtx_equal_p (dst, op0)
17324                           && low_register_operand (op1, SImode))
17325                         action = action_for_partial_flag_setting;
17326                       else if (rtx_equal_p (dst, op1)
17327                                && low_register_operand (op0, SImode))
17328                         action = action_for_partial_flag_setting == SKIP
17329                                  ? SKIP : SWAP_CONV;
17330                       break;
17331
17332                     case ASHIFTRT:
17333                     case ASHIFT:
17334                     case LSHIFTRT:
17335                       /* ASRS <Rdn>,<Rm> */
17336                       /* LSRS <Rdn>,<Rm> */
17337                       /* LSLS <Rdn>,<Rm> */
17338                       if (rtx_equal_p (dst, op0)
17339                           && low_register_operand (op1, SImode))
17340                         action = action_for_partial_flag_setting;
17341                       /* ASRS <Rd>,<Rm>,#<imm5> */
17342                       /* LSRS <Rd>,<Rm>,#<imm5> */
17343                       /* LSLS <Rd>,<Rm>,#<imm5> */
17344                       else if (low_register_operand (op0, SImode)
17345                                && CONST_INT_P (op1)
17346                                && IN_RANGE (INTVAL (op1), 0, 31))
17347                         action = action_for_partial_flag_setting;
17348                       break;
17349
17350                     case ROTATERT:
17351                       /* RORS <Rdn>,<Rm>  */
17352                       if (rtx_equal_p (dst, op0)
17353                           && low_register_operand (op1, SImode))
17354                         action = action_for_partial_flag_setting;
17355                       break;
17356
17357                     case NOT:
17358                       /* MVNS <Rd>,<Rm>  */
17359                       if (low_register_operand (op0, SImode))
17360                         action = action_for_partial_flag_setting;
17361                       break;
17362
17363                     case NEG:
17364                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17365                       if (low_register_operand (op0, SImode))
17366                         action = CONV;
17367                       break;
17368
17369                     case CONST_INT:
17370                       /* MOVS <Rd>,#<imm8>  */
17371                       if (CONST_INT_P (src)
17372                           && IN_RANGE (INTVAL (src), 0, 255))
17373                         action = action_for_partial_flag_setting;
17374                       break;
17375
17376                     case REG:
17377                       /* MOVS and MOV<c> with registers have different
17378                          encodings, so are not relevant here.  */
17379                       break;
17380
17381                     default:
17382                       break;
17383                     }
17384                 }
17385
17386               if (action != SKIP)
17387                 {
17388                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17389                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17390                   rtvec vec;
17391
17392                   if (action == SWAP_CONV)
17393                     {
17394                       src = copy_rtx (src);
17395                       XEXP (src, 0) = op1;
17396                       XEXP (src, 1) = op0;
17397                       pat = gen_rtx_SET (dst, src);
17398                       vec = gen_rtvec (2, pat, clobber);
17399                     }
17400                   else /* action == CONV */
17401                     vec = gen_rtvec (2, pat, clobber);
17402
17403                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17404                   INSN_CODE (insn) = -1;
17405                 }
17406             }
17407
17408           if (NONDEBUG_INSN_P (insn))
17409             df_simulate_one_insn_backwards (bb, insn, &live);
17410         }
17411     }
17412
17413   CLEAR_REG_SET (&live);
17414 }
17415
17416 /* Gcc puts the pool in the wrong place for ARM, since we can only
17417    load addresses a limited distance around the pc.  We do some
17418    special munging to move the constant pool values to the correct
17419    point in the code.  */
17420 static void
17421 arm_reorg (void)
17422 {
17423   rtx_insn *insn;
17424   HOST_WIDE_INT address = 0;
17425   Mfix * fix;
17426
17427   if (use_cmse)
17428     cmse_nonsecure_call_clear_caller_saved ();
17429   if (TARGET_THUMB1)
17430     thumb1_reorg ();
17431   else if (TARGET_THUMB2)
17432     thumb2_reorg ();
17433
17434   /* Ensure all insns that must be split have been split at this point.
17435      Otherwise, the pool placement code below may compute incorrect
17436      insn lengths.  Note that when optimizing, all insns have already
17437      been split at this point.  */
17438   if (!optimize)
17439     split_all_insns_noflow ();
17440
17441   /* Make sure we do not attempt to create a literal pool even though it should
17442      no longer be necessary to create any.  */
17443   if (arm_disable_literal_pool)
17444     return ;
17445
17446   minipool_fix_head = minipool_fix_tail = NULL;
17447
17448   /* The first insn must always be a note, or the code below won't
17449      scan it properly.  */
17450   insn = get_insns ();
17451   gcc_assert (NOTE_P (insn));
17452   minipool_pad = 0;
17453
17454   /* Scan all the insns and record the operands that will need fixing.  */
17455   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17456     {
17457       if (BARRIER_P (insn))
17458         push_minipool_barrier (insn, address);
17459       else if (INSN_P (insn))
17460         {
17461           rtx_jump_table_data *table;
17462
17463           note_invalid_constants (insn, address, true);
17464           address += get_attr_length (insn);
17465
17466           /* If the insn is a vector jump, add the size of the table
17467              and skip the table.  */
17468           if (tablejump_p (insn, NULL, &table))
17469             {
17470               address += get_jump_table_size (table);
17471               insn = table;
17472             }
17473         }
17474       else if (LABEL_P (insn))
17475         /* Add the worst-case padding due to alignment.  We don't add
17476            the _current_ padding because the minipool insertions
17477            themselves might change it.  */
17478         address += get_label_padding (insn);
17479     }
17480
17481   fix = minipool_fix_head;
17482
17483   /* Now scan the fixups and perform the required changes.  */
17484   while (fix)
17485     {
17486       Mfix * ftmp;
17487       Mfix * fdel;
17488       Mfix *  last_added_fix;
17489       Mfix * last_barrier = NULL;
17490       Mfix * this_fix;
17491
17492       /* Skip any further barriers before the next fix.  */
17493       while (fix && BARRIER_P (fix->insn))
17494         fix = fix->next;
17495
17496       /* No more fixes.  */
17497       if (fix == NULL)
17498         break;
17499
17500       last_added_fix = NULL;
17501
17502       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17503         {
17504           if (BARRIER_P (ftmp->insn))
17505             {
17506               if (ftmp->address >= minipool_vector_head->max_address)
17507                 break;
17508
17509               last_barrier = ftmp;
17510             }
17511           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17512             break;
17513
17514           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17515         }
17516
17517       /* If we found a barrier, drop back to that; any fixes that we
17518          could have reached but come after the barrier will now go in
17519          the next mini-pool.  */
17520       if (last_barrier != NULL)
17521         {
17522           /* Reduce the refcount for those fixes that won't go into this
17523              pool after all.  */
17524           for (fdel = last_barrier->next;
17525                fdel && fdel != ftmp;
17526                fdel = fdel->next)
17527             {
17528               fdel->minipool->refcount--;
17529               fdel->minipool = NULL;
17530             }
17531
17532           ftmp = last_barrier;
17533         }
17534       else
17535         {
17536           /* ftmp is first fix that we can't fit into this pool and
17537              there no natural barriers that we could use.  Insert a
17538              new barrier in the code somewhere between the previous
17539              fix and this one, and arrange to jump around it.  */
17540           HOST_WIDE_INT max_address;
17541
17542           /* The last item on the list of fixes must be a barrier, so
17543              we can never run off the end of the list of fixes without
17544              last_barrier being set.  */
17545           gcc_assert (ftmp);
17546
17547           max_address = minipool_vector_head->max_address;
17548           /* Check that there isn't another fix that is in range that
17549              we couldn't fit into this pool because the pool was
17550              already too large: we need to put the pool before such an
17551              instruction.  The pool itself may come just after the
17552              fix because create_fix_barrier also allows space for a
17553              jump instruction.  */
17554           if (ftmp->address < max_address)
17555             max_address = ftmp->address + 1;
17556
17557           last_barrier = create_fix_barrier (last_added_fix, max_address);
17558         }
17559
17560       assign_minipool_offsets (last_barrier);
17561
17562       while (ftmp)
17563         {
17564           if (!BARRIER_P (ftmp->insn)
17565               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17566                   == NULL))
17567             break;
17568
17569           ftmp = ftmp->next;
17570         }
17571
17572       /* Scan over the fixes we have identified for this pool, fixing them
17573          up and adding the constants to the pool itself.  */
17574       for (this_fix = fix; this_fix && ftmp != this_fix;
17575            this_fix = this_fix->next)
17576         if (!BARRIER_P (this_fix->insn))
17577           {
17578             rtx addr
17579               = plus_constant (Pmode,
17580                                gen_rtx_LABEL_REF (VOIDmode,
17581                                                   minipool_vector_label),
17582                                this_fix->minipool->offset);
17583             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17584           }
17585
17586       dump_minipool (last_barrier->insn);
17587       fix = ftmp;
17588     }
17589
17590   /* From now on we must synthesize any constants that we can't handle
17591      directly.  This can happen if the RTL gets split during final
17592      instruction generation.  */
17593   cfun->machine->after_arm_reorg = 1;
17594
17595   /* Free the minipool memory.  */
17596   obstack_free (&minipool_obstack, minipool_startobj);
17597 }
17598 \f
17599 /* Routines to output assembly language.  */
17600
17601 /* Return string representation of passed in real value.  */
17602 static const char *
17603 fp_const_from_val (REAL_VALUE_TYPE *r)
17604 {
17605   if (!fp_consts_inited)
17606     init_fp_table ();
17607
17608   gcc_assert (real_equal (r, &value_fp0));
17609   return "0";
17610 }
17611
17612 /* OPERANDS[0] is the entire list of insns that constitute pop,
17613    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17614    is in the list, UPDATE is true iff the list contains explicit
17615    update of base register.  */
17616 void
17617 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17618                          bool update)
17619 {
17620   int i;
17621   char pattern[100];
17622   int offset;
17623   const char *conditional;
17624   int num_saves = XVECLEN (operands[0], 0);
17625   unsigned int regno;
17626   unsigned int regno_base = REGNO (operands[1]);
17627   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17628
17629   offset = 0;
17630   offset += update ? 1 : 0;
17631   offset += return_pc ? 1 : 0;
17632
17633   /* Is the base register in the list?  */
17634   for (i = offset; i < num_saves; i++)
17635     {
17636       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17637       /* If SP is in the list, then the base register must be SP.  */
17638       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17639       /* If base register is in the list, there must be no explicit update.  */
17640       if (regno == regno_base)
17641         gcc_assert (!update);
17642     }
17643
17644   conditional = reverse ? "%?%D0" : "%?%d0";
17645   /* Can't use POP if returning from an interrupt.  */
17646   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17647     sprintf (pattern, "pop%s\t{", conditional);
17648   else
17649     {
17650       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651          It's just a convention, their semantics are identical.  */
17652       if (regno_base == SP_REGNUM)
17653         sprintf (pattern, "ldmfd%s\t", conditional);
17654       else if (update)
17655         sprintf (pattern, "ldmia%s\t", conditional);
17656       else
17657         sprintf (pattern, "ldm%s\t", conditional);
17658
17659       strcat (pattern, reg_names[regno_base]);
17660       if (update)
17661         strcat (pattern, "!, {");
17662       else
17663         strcat (pattern, ", {");
17664     }
17665
17666   /* Output the first destination register.  */
17667   strcat (pattern,
17668           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17669
17670   /* Output the rest of the destination registers.  */
17671   for (i = offset + 1; i < num_saves; i++)
17672     {
17673       strcat (pattern, ", ");
17674       strcat (pattern,
17675               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17676     }
17677
17678   strcat (pattern, "}");
17679
17680   if (interrupt_p && return_pc)
17681     strcat (pattern, "^");
17682
17683   output_asm_insn (pattern, &cond);
17684 }
17685
17686
17687 /* Output the assembly for a store multiple.  */
17688
17689 const char *
17690 vfp_output_vstmd (rtx * operands)
17691 {
17692   char pattern[100];
17693   int p;
17694   int base;
17695   int i;
17696   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17697                    ? XEXP (operands[0], 0)
17698                    : XEXP (XEXP (operands[0], 0), 0);
17699   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17700
17701   if (push_p)
17702     strcpy (pattern, "vpush%?.64\t{%P1");
17703   else
17704     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17705
17706   p = strlen (pattern);
17707
17708   gcc_assert (REG_P (operands[1]));
17709
17710   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17711   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17712     {
17713       p += sprintf (&pattern[p], ", d%d", base + i);
17714     }
17715   strcpy (&pattern[p], "}");
17716
17717   output_asm_insn (pattern, operands);
17718   return "";
17719 }
17720
17721
17722 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17723    number of bytes pushed.  */
17724
17725 static int
17726 vfp_emit_fstmd (int base_reg, int count)
17727 {
17728   rtx par;
17729   rtx dwarf;
17730   rtx tmp, reg;
17731   int i;
17732
17733   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17734      register pairs are stored by a store multiple insn.  We avoid this
17735      by pushing an extra pair.  */
17736   if (count == 2 && !arm_arch6)
17737     {
17738       if (base_reg == LAST_VFP_REGNUM - 3)
17739         base_reg -= 2;
17740       count++;
17741     }
17742
17743   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17744      larger stores into multiple parts (up to a maximum of two, in
17745      practice).  */
17746   if (count > 16)
17747     {
17748       int saved;
17749       /* NOTE: base_reg is an internal register number, so each D register
17750          counts as 2.  */
17751       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17752       saved += vfp_emit_fstmd (base_reg, 16);
17753       return saved;
17754     }
17755
17756   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17757   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17758
17759   reg = gen_rtx_REG (DFmode, base_reg);
17760   base_reg += 2;
17761
17762   XVECEXP (par, 0, 0)
17763     = gen_rtx_SET (gen_frame_mem
17764                    (BLKmode,
17765                     gen_rtx_PRE_MODIFY (Pmode,
17766                                         stack_pointer_rtx,
17767                                         plus_constant
17768                                         (Pmode, stack_pointer_rtx,
17769                                          - (count * 8)))
17770                     ),
17771                    gen_rtx_UNSPEC (BLKmode,
17772                                    gen_rtvec (1, reg),
17773                                    UNSPEC_PUSH_MULT));
17774
17775   tmp = gen_rtx_SET (stack_pointer_rtx,
17776                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777   RTX_FRAME_RELATED_P (tmp) = 1;
17778   XVECEXP (dwarf, 0, 0) = tmp;
17779
17780   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17781   RTX_FRAME_RELATED_P (tmp) = 1;
17782   XVECEXP (dwarf, 0, 1) = tmp;
17783
17784   for (i = 1; i < count; i++)
17785     {
17786       reg = gen_rtx_REG (DFmode, base_reg);
17787       base_reg += 2;
17788       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17789
17790       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17791                                         plus_constant (Pmode,
17792                                                        stack_pointer_rtx,
17793                                                        i * 8)),
17794                          reg);
17795       RTX_FRAME_RELATED_P (tmp) = 1;
17796       XVECEXP (dwarf, 0, i + 1) = tmp;
17797     }
17798
17799   par = emit_insn (par);
17800   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17801   RTX_FRAME_RELATED_P (par) = 1;
17802
17803   return count * 8;
17804 }
17805
17806 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17807    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17808
17809 bool
17810 detect_cmse_nonsecure_call (tree addr)
17811 {
17812   if (!addr)
17813     return FALSE;
17814
17815   tree fntype = TREE_TYPE (addr);
17816   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17817                                     TYPE_ATTRIBUTES (fntype)))
17818     return TRUE;
17819   return FALSE;
17820 }
17821
17822
17823 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17824    the call target.  */
17825
17826 void
17827 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17828 {
17829   rtx insn;
17830
17831   insn = emit_call_insn (pat);
17832
17833   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17834      If the call might use such an entry, add a use of the PIC register
17835      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17836   if (TARGET_VXWORKS_RTP
17837       && flag_pic
17838       && !sibcall
17839       && GET_CODE (addr) == SYMBOL_REF
17840       && (SYMBOL_REF_DECL (addr)
17841           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17842           : !SYMBOL_REF_LOCAL_P (addr)))
17843     {
17844       require_pic_register ();
17845       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17846     }
17847
17848   if (TARGET_AAPCS_BASED)
17849     {
17850       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17851          linker.  We need to add an IP clobber to allow setting
17852          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17853          is not needed since it's a fixed register.  */
17854       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17855       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17856     }
17857 }
17858
17859 /* Output a 'call' insn.  */
17860 const char *
17861 output_call (rtx *operands)
17862 {
17863   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17864
17865   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17866   if (REGNO (operands[0]) == LR_REGNUM)
17867     {
17868       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17869       output_asm_insn ("mov%?\t%0, %|lr", operands);
17870     }
17871
17872   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17873
17874   if (TARGET_INTERWORK || arm_arch4t)
17875     output_asm_insn ("bx%?\t%0", operands);
17876   else
17877     output_asm_insn ("mov%?\t%|pc, %0", operands);
17878
17879   return "";
17880 }
17881
17882 /* Output a move from arm registers to arm registers of a long double
17883    OPERANDS[0] is the destination.
17884    OPERANDS[1] is the source.  */
17885 const char *
17886 output_mov_long_double_arm_from_arm (rtx *operands)
17887 {
17888   /* We have to be careful here because the two might overlap.  */
17889   int dest_start = REGNO (operands[0]);
17890   int src_start = REGNO (operands[1]);
17891   rtx ops[2];
17892   int i;
17893
17894   if (dest_start < src_start)
17895     {
17896       for (i = 0; i < 3; i++)
17897         {
17898           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17899           ops[1] = gen_rtx_REG (SImode, src_start + i);
17900           output_asm_insn ("mov%?\t%0, %1", ops);
17901         }
17902     }
17903   else
17904     {
17905       for (i = 2; i >= 0; i--)
17906         {
17907           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17908           ops[1] = gen_rtx_REG (SImode, src_start + i);
17909           output_asm_insn ("mov%?\t%0, %1", ops);
17910         }
17911     }
17912
17913   return "";
17914 }
17915
17916 void
17917 arm_emit_movpair (rtx dest, rtx src)
17918  {
17919   /* If the src is an immediate, simplify it.  */
17920   if (CONST_INT_P (src))
17921     {
17922       HOST_WIDE_INT val = INTVAL (src);
17923       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17924       if ((val >> 16) & 0x0000ffff)
17925         {
17926           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17927                                                GEN_INT (16)),
17928                          GEN_INT ((val >> 16) & 0x0000ffff));
17929           rtx_insn *insn = get_last_insn ();
17930           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17931         }
17932       return;
17933     }
17934    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17935    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17936    rtx_insn *insn = get_last_insn ();
17937    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17938  }
17939
17940 /* Output a move between double words.  It must be REG<-MEM
17941    or MEM<-REG.  */
17942 const char *
17943 output_move_double (rtx *operands, bool emit, int *count)
17944 {
17945   enum rtx_code code0 = GET_CODE (operands[0]);
17946   enum rtx_code code1 = GET_CODE (operands[1]);
17947   rtx otherops[3];
17948   if (count)
17949     *count = 1;
17950
17951   /* The only case when this might happen is when
17952      you are looking at the length of a DImode instruction
17953      that has an invalid constant in it.  */
17954   if (code0 == REG && code1 != MEM)
17955     {
17956       gcc_assert (!emit);
17957       *count = 2;
17958       return "";
17959     }
17960
17961   if (code0 == REG)
17962     {
17963       unsigned int reg0 = REGNO (operands[0]);
17964
17965       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17966
17967       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17968
17969       switch (GET_CODE (XEXP (operands[1], 0)))
17970         {
17971         case REG:
17972
17973           if (emit)
17974             {
17975               if (TARGET_LDRD
17976                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17977                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17978               else
17979                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17980             }
17981           break;
17982
17983         case PRE_INC:
17984           gcc_assert (TARGET_LDRD);
17985           if (emit)
17986             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17987           break;
17988
17989         case PRE_DEC:
17990           if (emit)
17991             {
17992               if (TARGET_LDRD)
17993                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17994               else
17995                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17996             }
17997           break;
17998
17999         case POST_INC:
18000           if (emit)
18001             {
18002               if (TARGET_LDRD)
18003                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18004               else
18005                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18006             }
18007           break;
18008
18009         case POST_DEC:
18010           gcc_assert (TARGET_LDRD);
18011           if (emit)
18012             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18013           break;
18014
18015         case PRE_MODIFY:
18016         case POST_MODIFY:
18017           /* Autoicrement addressing modes should never have overlapping
18018              base and destination registers, and overlapping index registers
18019              are already prohibited, so this doesn't need to worry about
18020              fix_cm3_ldrd.  */
18021           otherops[0] = operands[0];
18022           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18023           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18024
18025           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18026             {
18027               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18028                 {
18029                   /* Registers overlap so split out the increment.  */
18030                   if (emit)
18031                     {
18032                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18033                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18034                     }
18035                   if (count)
18036                     *count = 2;
18037                 }
18038               else
18039                 {
18040                   /* Use a single insn if we can.
18041                      FIXME: IWMMXT allows offsets larger than ldrd can
18042                      handle, fix these up with a pair of ldr.  */
18043                   if (TARGET_THUMB2
18044                       || !CONST_INT_P (otherops[2])
18045                       || (INTVAL (otherops[2]) > -256
18046                           && INTVAL (otherops[2]) < 256))
18047                     {
18048                       if (emit)
18049                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18050                     }
18051                   else
18052                     {
18053                       if (emit)
18054                         {
18055                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18056                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18057                         }
18058                       if (count)
18059                         *count = 2;
18060
18061                     }
18062                 }
18063             }
18064           else
18065             {
18066               /* Use a single insn if we can.
18067                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18068                  fix these up with a pair of ldr.  */
18069               if (TARGET_THUMB2
18070                   || !CONST_INT_P (otherops[2])
18071                   || (INTVAL (otherops[2]) > -256
18072                       && INTVAL (otherops[2]) < 256))
18073                 {
18074                   if (emit)
18075                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18076                 }
18077               else
18078                 {
18079                   if (emit)
18080                     {
18081                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18082                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18083                     }
18084                   if (count)
18085                     *count = 2;
18086                 }
18087             }
18088           break;
18089
18090         case LABEL_REF:
18091         case CONST:
18092           /* We might be able to use ldrd %0, %1 here.  However the range is
18093              different to ldr/adr, and it is broken on some ARMv7-M
18094              implementations.  */
18095           /* Use the second register of the pair to avoid problematic
18096              overlap.  */
18097           otherops[1] = operands[1];
18098           if (emit)
18099             output_asm_insn ("adr%?\t%0, %1", otherops);
18100           operands[1] = otherops[0];
18101           if (emit)
18102             {
18103               if (TARGET_LDRD)
18104                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18105               else
18106                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18107             }
18108
18109           if (count)
18110             *count = 2;
18111           break;
18112
18113           /* ??? This needs checking for thumb2.  */
18114         default:
18115           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18116                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18117             {
18118               otherops[0] = operands[0];
18119               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18120               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18121
18122               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18123                 {
18124                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18125                     {
18126                       switch ((int) INTVAL (otherops[2]))
18127                         {
18128                         case -8:
18129                           if (emit)
18130                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18131                           return "";
18132                         case -4:
18133                           if (TARGET_THUMB2)
18134                             break;
18135                           if (emit)
18136                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18137                           return "";
18138                         case 4:
18139                           if (TARGET_THUMB2)
18140                             break;
18141                           if (emit)
18142                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18143                           return "";
18144                         }
18145                     }
18146                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18147                   operands[1] = otherops[0];
18148                   if (TARGET_LDRD
18149                       && (REG_P (otherops[2])
18150                           || TARGET_THUMB2
18151                           || (CONST_INT_P (otherops[2])
18152                               && INTVAL (otherops[2]) > -256
18153                               && INTVAL (otherops[2]) < 256)))
18154                     {
18155                       if (reg_overlap_mentioned_p (operands[0],
18156                                                    otherops[2]))
18157                         {
18158                           /* Swap base and index registers over to
18159                              avoid a conflict.  */
18160                           std::swap (otherops[1], otherops[2]);
18161                         }
18162                       /* If both registers conflict, it will usually
18163                          have been fixed by a splitter.  */
18164                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18165                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18166                         {
18167                           if (emit)
18168                             {
18169                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18170                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18171                             }
18172                           if (count)
18173                             *count = 2;
18174                         }
18175                       else
18176                         {
18177                           otherops[0] = operands[0];
18178                           if (emit)
18179                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18180                         }
18181                       return "";
18182                     }
18183
18184                   if (CONST_INT_P (otherops[2]))
18185                     {
18186                       if (emit)
18187                         {
18188                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18189                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18190                           else
18191                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18192                         }
18193                     }
18194                   else
18195                     {
18196                       if (emit)
18197                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18198                     }
18199                 }
18200               else
18201                 {
18202                   if (emit)
18203                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18204                 }
18205
18206               if (count)
18207                 *count = 2;
18208
18209               if (TARGET_LDRD)
18210                 return "ldrd%?\t%0, [%1]";
18211
18212               return "ldmia%?\t%1, %M0";
18213             }
18214           else
18215             {
18216               otherops[1] = adjust_address (operands[1], SImode, 4);
18217               /* Take care of overlapping base/data reg.  */
18218               if (reg_mentioned_p (operands[0], operands[1]))
18219                 {
18220                   if (emit)
18221                     {
18222                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18223                       output_asm_insn ("ldr%?\t%0, %1", operands);
18224                     }
18225                   if (count)
18226                     *count = 2;
18227
18228                 }
18229               else
18230                 {
18231                   if (emit)
18232                     {
18233                       output_asm_insn ("ldr%?\t%0, %1", operands);
18234                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18235                     }
18236                   if (count)
18237                     *count = 2;
18238                 }
18239             }
18240         }
18241     }
18242   else
18243     {
18244       /* Constraints should ensure this.  */
18245       gcc_assert (code0 == MEM && code1 == REG);
18246       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18247                   || (TARGET_ARM && TARGET_LDRD));
18248
18249       switch (GET_CODE (XEXP (operands[0], 0)))
18250         {
18251         case REG:
18252           if (emit)
18253             {
18254               if (TARGET_LDRD)
18255                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18256               else
18257                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18258             }
18259           break;
18260
18261         case PRE_INC:
18262           gcc_assert (TARGET_LDRD);
18263           if (emit)
18264             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18265           break;
18266
18267         case PRE_DEC:
18268           if (emit)
18269             {
18270               if (TARGET_LDRD)
18271                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18272               else
18273                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18274             }
18275           break;
18276
18277         case POST_INC:
18278           if (emit)
18279             {
18280               if (TARGET_LDRD)
18281                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18282               else
18283                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18284             }
18285           break;
18286
18287         case POST_DEC:
18288           gcc_assert (TARGET_LDRD);
18289           if (emit)
18290             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18291           break;
18292
18293         case PRE_MODIFY:
18294         case POST_MODIFY:
18295           otherops[0] = operands[1];
18296           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18297           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18298
18299           /* IWMMXT allows offsets larger than ldrd can handle,
18300              fix these up with a pair of ldr.  */
18301           if (!TARGET_THUMB2
18302               && CONST_INT_P (otherops[2])
18303               && (INTVAL(otherops[2]) <= -256
18304                   || INTVAL(otherops[2]) >= 256))
18305             {
18306               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18307                 {
18308                   if (emit)
18309                     {
18310                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18311                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18312                     }
18313                   if (count)
18314                     *count = 2;
18315                 }
18316               else
18317                 {
18318                   if (emit)
18319                     {
18320                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18321                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18322                     }
18323                   if (count)
18324                     *count = 2;
18325                 }
18326             }
18327           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18328             {
18329               if (emit)
18330                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18331             }
18332           else
18333             {
18334               if (emit)
18335                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18336             }
18337           break;
18338
18339         case PLUS:
18340           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18341           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18342             {
18343               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18344                 {
18345                 case -8:
18346                   if (emit)
18347                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18348                   return "";
18349
18350                 case -4:
18351                   if (TARGET_THUMB2)
18352                     break;
18353                   if (emit)
18354                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18355                   return "";
18356
18357                 case 4:
18358                   if (TARGET_THUMB2)
18359                     break;
18360                   if (emit)
18361                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18362                   return "";
18363                 }
18364             }
18365           if (TARGET_LDRD
18366               && (REG_P (otherops[2])
18367                   || TARGET_THUMB2
18368                   || (CONST_INT_P (otherops[2])
18369                       && INTVAL (otherops[2]) > -256
18370                       && INTVAL (otherops[2]) < 256)))
18371             {
18372               otherops[0] = operands[1];
18373               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18374               if (emit)
18375                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18376               return "";
18377             }
18378           /* Fall through */
18379
18380         default:
18381           otherops[0] = adjust_address (operands[0], SImode, 4);
18382           otherops[1] = operands[1];
18383           if (emit)
18384             {
18385               output_asm_insn ("str%?\t%1, %0", operands);
18386               output_asm_insn ("str%?\t%H1, %0", otherops);
18387             }
18388           if (count)
18389             *count = 2;
18390         }
18391     }
18392
18393   return "";
18394 }
18395
18396 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18397    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18398
18399 const char *
18400 output_move_quad (rtx *operands)
18401 {
18402   if (REG_P (operands[0]))
18403     {
18404       /* Load, or reg->reg move.  */
18405
18406       if (MEM_P (operands[1]))
18407         {
18408           switch (GET_CODE (XEXP (operands[1], 0)))
18409             {
18410             case REG:
18411               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18412               break;
18413
18414             case LABEL_REF:
18415             case CONST:
18416               output_asm_insn ("adr%?\t%0, %1", operands);
18417               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18418               break;
18419
18420             default:
18421               gcc_unreachable ();
18422             }
18423         }
18424       else
18425         {
18426           rtx ops[2];
18427           int dest, src, i;
18428
18429           gcc_assert (REG_P (operands[1]));
18430
18431           dest = REGNO (operands[0]);
18432           src = REGNO (operands[1]);
18433
18434           /* This seems pretty dumb, but hopefully GCC won't try to do it
18435              very often.  */
18436           if (dest < src)
18437             for (i = 0; i < 4; i++)
18438               {
18439                 ops[0] = gen_rtx_REG (SImode, dest + i);
18440                 ops[1] = gen_rtx_REG (SImode, src + i);
18441                 output_asm_insn ("mov%?\t%0, %1", ops);
18442               }
18443           else
18444             for (i = 3; i >= 0; i--)
18445               {
18446                 ops[0] = gen_rtx_REG (SImode, dest + i);
18447                 ops[1] = gen_rtx_REG (SImode, src + i);
18448                 output_asm_insn ("mov%?\t%0, %1", ops);
18449               }
18450         }
18451     }
18452   else
18453     {
18454       gcc_assert (MEM_P (operands[0]));
18455       gcc_assert (REG_P (operands[1]));
18456       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18457
18458       switch (GET_CODE (XEXP (operands[0], 0)))
18459         {
18460         case REG:
18461           output_asm_insn ("stm%?\t%m0, %M1", operands);
18462           break;
18463
18464         default:
18465           gcc_unreachable ();
18466         }
18467     }
18468
18469   return "";
18470 }
18471
18472 /* Output a VFP load or store instruction.  */
18473
18474 const char *
18475 output_move_vfp (rtx *operands)
18476 {
18477   rtx reg, mem, addr, ops[2];
18478   int load = REG_P (operands[0]);
18479   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18480   int sp = (!TARGET_VFP_FP16INST
18481             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18482   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18483   const char *templ;
18484   char buff[50];
18485   machine_mode mode;
18486
18487   reg = operands[!load];
18488   mem = operands[load];
18489
18490   mode = GET_MODE (reg);
18491
18492   gcc_assert (REG_P (reg));
18493   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18494   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18495               || mode == SFmode
18496               || mode == DFmode
18497               || mode == HImode
18498               || mode == SImode
18499               || mode == DImode
18500               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18501   gcc_assert (MEM_P (mem));
18502
18503   addr = XEXP (mem, 0);
18504
18505   switch (GET_CODE (addr))
18506     {
18507     case PRE_DEC:
18508       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18509       ops[0] = XEXP (addr, 0);
18510       ops[1] = reg;
18511       break;
18512
18513     case POST_INC:
18514       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18515       ops[0] = XEXP (addr, 0);
18516       ops[1] = reg;
18517       break;
18518
18519     default:
18520       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18521       ops[0] = reg;
18522       ops[1] = mem;
18523       break;
18524     }
18525
18526   sprintf (buff, templ,
18527            load ? "ld" : "st",
18528            dp ? "64" : sp ? "32" : "16",
18529            dp ? "P" : "",
18530            integer_p ? "\t%@ int" : "");
18531   output_asm_insn (buff, ops);
18532
18533   return "";
18534 }
18535
18536 /* Output a Neon double-word or quad-word load or store, or a load
18537    or store for larger structure modes.
18538
18539    WARNING: The ordering of elements is weird in big-endian mode,
18540    because the EABI requires that vectors stored in memory appear
18541    as though they were stored by a VSTM, as required by the EABI.
18542    GCC RTL defines element ordering based on in-memory order.
18543    This can be different from the architectural ordering of elements
18544    within a NEON register. The intrinsics defined in arm_neon.h use the
18545    NEON register element ordering, not the GCC RTL element ordering.
18546
18547    For example, the in-memory ordering of a big-endian a quadword
18548    vector with 16-bit elements when stored from register pair {d0,d1}
18549    will be (lowest address first, d0[N] is NEON register element N):
18550
18551      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18552
18553    When necessary, quadword registers (dN, dN+1) are moved to ARM
18554    registers from rN in the order:
18555
18556      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18557
18558    So that STM/LDM can be used on vectors in ARM registers, and the
18559    same memory layout will result as if VSTM/VLDM were used.
18560
18561    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18562    possible, which allows use of appropriate alignment tags.
18563    Note that the choice of "64" is independent of the actual vector
18564    element size; this size simply ensures that the behavior is
18565    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18566
18567    Due to limitations of those instructions, use of VST1.64/VLD1.64
18568    is not possible if:
18569     - the address contains PRE_DEC, or
18570     - the mode refers to more than 4 double-word registers
18571
18572    In those cases, it would be possible to replace VSTM/VLDM by a
18573    sequence of instructions; this is not currently implemented since
18574    this is not certain to actually improve performance.  */
18575
18576 const char *
18577 output_move_neon (rtx *operands)
18578 {
18579   rtx reg, mem, addr, ops[2];
18580   int regno, nregs, load = REG_P (operands[0]);
18581   const char *templ;
18582   char buff[50];
18583   machine_mode mode;
18584
18585   reg = operands[!load];
18586   mem = operands[load];
18587
18588   mode = GET_MODE (reg);
18589
18590   gcc_assert (REG_P (reg));
18591   regno = REGNO (reg);
18592   nregs = REG_NREGS (reg) / 2;
18593   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18594               || NEON_REGNO_OK_FOR_QUAD (regno));
18595   gcc_assert (VALID_NEON_DREG_MODE (mode)
18596               || VALID_NEON_QREG_MODE (mode)
18597               || VALID_NEON_STRUCT_MODE (mode));
18598   gcc_assert (MEM_P (mem));
18599
18600   addr = XEXP (mem, 0);
18601
18602   /* Strip off const from addresses like (const (plus (...))).  */
18603   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18604     addr = XEXP (addr, 0);
18605
18606   switch (GET_CODE (addr))
18607     {
18608     case POST_INC:
18609       /* We have to use vldm / vstm for too-large modes.  */
18610       if (nregs > 4)
18611         {
18612           templ = "v%smia%%?\t%%0!, %%h1";
18613           ops[0] = XEXP (addr, 0);
18614         }
18615       else
18616         {
18617           templ = "v%s1.64\t%%h1, %%A0";
18618           ops[0] = mem;
18619         }
18620       ops[1] = reg;
18621       break;
18622
18623     case PRE_DEC:
18624       /* We have to use vldm / vstm in this case, since there is no
18625          pre-decrement form of the vld1 / vst1 instructions.  */
18626       templ = "v%smdb%%?\t%%0!, %%h1";
18627       ops[0] = XEXP (addr, 0);
18628       ops[1] = reg;
18629       break;
18630
18631     case POST_MODIFY:
18632       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18633       gcc_unreachable ();
18634
18635     case REG:
18636       /* We have to use vldm / vstm for too-large modes.  */
18637       if (nregs > 1)
18638         {
18639           if (nregs > 4)
18640             templ = "v%smia%%?\t%%m0, %%h1";
18641           else
18642             templ = "v%s1.64\t%%h1, %%A0";
18643
18644           ops[0] = mem;
18645           ops[1] = reg;
18646           break;
18647         }
18648       /* Fall through.  */
18649     case LABEL_REF:
18650     case PLUS:
18651       {
18652         int i;
18653         int overlap = -1;
18654         for (i = 0; i < nregs; i++)
18655           {
18656             /* We're only using DImode here because it's a convenient size.  */
18657             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18658             ops[1] = adjust_address (mem, DImode, 8 * i);
18659             if (reg_overlap_mentioned_p (ops[0], mem))
18660               {
18661                 gcc_assert (overlap == -1);
18662                 overlap = i;
18663               }
18664             else
18665               {
18666                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18667                 output_asm_insn (buff, ops);
18668               }
18669           }
18670         if (overlap != -1)
18671           {
18672             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18673             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18674             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18675             output_asm_insn (buff, ops);
18676           }
18677
18678         return "";
18679       }
18680
18681     default:
18682       gcc_unreachable ();
18683     }
18684
18685   sprintf (buff, templ, load ? "ld" : "st");
18686   output_asm_insn (buff, ops);
18687
18688   return "";
18689 }
18690
18691 /* Compute and return the length of neon_mov<mode>, where <mode> is
18692    one of VSTRUCT modes: EI, OI, CI or XI.  */
18693 int
18694 arm_attr_length_move_neon (rtx_insn *insn)
18695 {
18696   rtx reg, mem, addr;
18697   int load;
18698   machine_mode mode;
18699
18700   extract_insn_cached (insn);
18701
18702   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18703     {
18704       mode = GET_MODE (recog_data.operand[0]);
18705       switch (mode)
18706         {
18707         case E_EImode:
18708         case E_OImode:
18709           return 8;
18710         case E_CImode:
18711           return 12;
18712         case E_XImode:
18713           return 16;
18714         default:
18715           gcc_unreachable ();
18716         }
18717     }
18718
18719   load = REG_P (recog_data.operand[0]);
18720   reg = recog_data.operand[!load];
18721   mem = recog_data.operand[load];
18722
18723   gcc_assert (MEM_P (mem));
18724
18725   addr = XEXP (mem, 0);
18726
18727   /* Strip off const from addresses like (const (plus (...))).  */
18728   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18729     addr = XEXP (addr, 0);
18730
18731   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18732     {
18733       int insns = REG_NREGS (reg) / 2;
18734       return insns * 4;
18735     }
18736   else
18737     return 4;
18738 }
18739
18740 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18741    return zero.  */
18742
18743 int
18744 arm_address_offset_is_imm (rtx_insn *insn)
18745 {
18746   rtx mem, addr;
18747
18748   extract_insn_cached (insn);
18749
18750   if (REG_P (recog_data.operand[0]))
18751     return 0;
18752
18753   mem = recog_data.operand[0];
18754
18755   gcc_assert (MEM_P (mem));
18756
18757   addr = XEXP (mem, 0);
18758
18759   if (REG_P (addr)
18760       || (GET_CODE (addr) == PLUS
18761           && REG_P (XEXP (addr, 0))
18762           && CONST_INT_P (XEXP (addr, 1))))
18763     return 1;
18764   else
18765     return 0;
18766 }
18767
18768 /* Output an ADD r, s, #n where n may be too big for one instruction.
18769    If adding zero to one register, output nothing.  */
18770 const char *
18771 output_add_immediate (rtx *operands)
18772 {
18773   HOST_WIDE_INT n = INTVAL (operands[2]);
18774
18775   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18776     {
18777       if (n < 0)
18778         output_multi_immediate (operands,
18779                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18780                                 -n);
18781       else
18782         output_multi_immediate (operands,
18783                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18784                                 n);
18785     }
18786
18787   return "";
18788 }
18789
18790 /* Output a multiple immediate operation.
18791    OPERANDS is the vector of operands referred to in the output patterns.
18792    INSTR1 is the output pattern to use for the first constant.
18793    INSTR2 is the output pattern to use for subsequent constants.
18794    IMMED_OP is the index of the constant slot in OPERANDS.
18795    N is the constant value.  */
18796 static const char *
18797 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18798                         int immed_op, HOST_WIDE_INT n)
18799 {
18800 #if HOST_BITS_PER_WIDE_INT > 32
18801   n &= 0xffffffff;
18802 #endif
18803
18804   if (n == 0)
18805     {
18806       /* Quick and easy output.  */
18807       operands[immed_op] = const0_rtx;
18808       output_asm_insn (instr1, operands);
18809     }
18810   else
18811     {
18812       int i;
18813       const char * instr = instr1;
18814
18815       /* Note that n is never zero here (which would give no output).  */
18816       for (i = 0; i < 32; i += 2)
18817         {
18818           if (n & (3 << i))
18819             {
18820               operands[immed_op] = GEN_INT (n & (255 << i));
18821               output_asm_insn (instr, operands);
18822               instr = instr2;
18823               i += 6;
18824             }
18825         }
18826     }
18827
18828   return "";
18829 }
18830
18831 /* Return the name of a shifter operation.  */
18832 static const char *
18833 arm_shift_nmem(enum rtx_code code)
18834 {
18835   switch (code)
18836     {
18837     case ASHIFT:
18838       return ARM_LSL_NAME;
18839
18840     case ASHIFTRT:
18841       return "asr";
18842
18843     case LSHIFTRT:
18844       return "lsr";
18845
18846     case ROTATERT:
18847       return "ror";
18848
18849     default:
18850       abort();
18851     }
18852 }
18853
18854 /* Return the appropriate ARM instruction for the operation code.
18855    The returned result should not be overwritten.  OP is the rtx of the
18856    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18857    was shifted.  */
18858 const char *
18859 arithmetic_instr (rtx op, int shift_first_arg)
18860 {
18861   switch (GET_CODE (op))
18862     {
18863     case PLUS:
18864       return "add";
18865
18866     case MINUS:
18867       return shift_first_arg ? "rsb" : "sub";
18868
18869     case IOR:
18870       return "orr";
18871
18872     case XOR:
18873       return "eor";
18874
18875     case AND:
18876       return "and";
18877
18878     case ASHIFT:
18879     case ASHIFTRT:
18880     case LSHIFTRT:
18881     case ROTATERT:
18882       return arm_shift_nmem(GET_CODE(op));
18883
18884     default:
18885       gcc_unreachable ();
18886     }
18887 }
18888
18889 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18890    for the operation code.  The returned result should not be overwritten.
18891    OP is the rtx code of the shift.
18892    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18893    shift.  */
18894 static const char *
18895 shift_op (rtx op, HOST_WIDE_INT *amountp)
18896 {
18897   const char * mnem;
18898   enum rtx_code code = GET_CODE (op);
18899
18900   switch (code)
18901     {
18902     case ROTATE:
18903       if (!CONST_INT_P (XEXP (op, 1)))
18904         {
18905           output_operand_lossage ("invalid shift operand");
18906           return NULL;
18907         }
18908
18909       code = ROTATERT;
18910       *amountp = 32 - INTVAL (XEXP (op, 1));
18911       mnem = "ror";
18912       break;
18913
18914     case ASHIFT:
18915     case ASHIFTRT:
18916     case LSHIFTRT:
18917     case ROTATERT:
18918       mnem = arm_shift_nmem(code);
18919       if (CONST_INT_P (XEXP (op, 1)))
18920         {
18921           *amountp = INTVAL (XEXP (op, 1));
18922         }
18923       else if (REG_P (XEXP (op, 1)))
18924         {
18925           *amountp = -1;
18926           return mnem;
18927         }
18928       else
18929         {
18930           output_operand_lossage ("invalid shift operand");
18931           return NULL;
18932         }
18933       break;
18934
18935     case MULT:
18936       /* We never have to worry about the amount being other than a
18937          power of 2, since this case can never be reloaded from a reg.  */
18938       if (!CONST_INT_P (XEXP (op, 1)))
18939         {
18940           output_operand_lossage ("invalid shift operand");
18941           return NULL;
18942         }
18943
18944       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18945
18946       /* Amount must be a power of two.  */
18947       if (*amountp & (*amountp - 1))
18948         {
18949           output_operand_lossage ("invalid shift operand");
18950           return NULL;
18951         }
18952
18953       *amountp = exact_log2 (*amountp);
18954       gcc_assert (IN_RANGE (*amountp, 0, 31));
18955       return ARM_LSL_NAME;
18956
18957     default:
18958       output_operand_lossage ("invalid shift operand");
18959       return NULL;
18960     }
18961
18962   /* This is not 100% correct, but follows from the desire to merge
18963      multiplication by a power of 2 with the recognizer for a
18964      shift.  >=32 is not a valid shift for "lsl", so we must try and
18965      output a shift that produces the correct arithmetical result.
18966      Using lsr #32 is identical except for the fact that the carry bit
18967      is not set correctly if we set the flags; but we never use the
18968      carry bit from such an operation, so we can ignore that.  */
18969   if (code == ROTATERT)
18970     /* Rotate is just modulo 32.  */
18971     *amountp &= 31;
18972   else if (*amountp != (*amountp & 31))
18973     {
18974       if (code == ASHIFT)
18975         mnem = "lsr";
18976       *amountp = 32;
18977     }
18978
18979   /* Shifts of 0 are no-ops.  */
18980   if (*amountp == 0)
18981     return NULL;
18982
18983   return mnem;
18984 }
18985
18986 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18987    because /bin/as is horribly restrictive.  The judgement about
18988    whether or not each character is 'printable' (and can be output as
18989    is) or not (and must be printed with an octal escape) must be made
18990    with reference to the *host* character set -- the situation is
18991    similar to that discussed in the comments above pp_c_char in
18992    c-pretty-print.c.  */
18993
18994 #define MAX_ASCII_LEN 51
18995
18996 void
18997 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18998 {
18999   int i;
19000   int len_so_far = 0;
19001
19002   fputs ("\t.ascii\t\"", stream);
19003
19004   for (i = 0; i < len; i++)
19005     {
19006       int c = p[i];
19007
19008       if (len_so_far >= MAX_ASCII_LEN)
19009         {
19010           fputs ("\"\n\t.ascii\t\"", stream);
19011           len_so_far = 0;
19012         }
19013
19014       if (ISPRINT (c))
19015         {
19016           if (c == '\\' || c == '\"')
19017             {
19018               putc ('\\', stream);
19019               len_so_far++;
19020             }
19021           putc (c, stream);
19022           len_so_far++;
19023         }
19024       else
19025         {
19026           fprintf (stream, "\\%03o", c);
19027           len_so_far += 4;
19028         }
19029     }
19030
19031   fputs ("\"\n", stream);
19032 }
19033 \f
19034 /* Whether a register is callee saved or not.  This is necessary because high
19035    registers are marked as caller saved when optimizing for size on Thumb-1
19036    targets despite being callee saved in order to avoid using them.  */
19037 #define callee_saved_reg_p(reg) \
19038   (!call_used_regs[reg] \
19039    || (TARGET_THUMB1 && optimize_size \
19040        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19041
19042 /* Compute the register save mask for registers 0 through 12
19043    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19044
19045 static unsigned long
19046 arm_compute_save_reg0_reg12_mask (void)
19047 {
19048   unsigned long func_type = arm_current_func_type ();
19049   unsigned long save_reg_mask = 0;
19050   unsigned int reg;
19051
19052   if (IS_INTERRUPT (func_type))
19053     {
19054       unsigned int max_reg;
19055       /* Interrupt functions must not corrupt any registers,
19056          even call clobbered ones.  If this is a leaf function
19057          we can just examine the registers used by the RTL, but
19058          otherwise we have to assume that whatever function is
19059          called might clobber anything, and so we have to save
19060          all the call-clobbered registers as well.  */
19061       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19062         /* FIQ handlers have registers r8 - r12 banked, so
19063            we only need to check r0 - r7, Normal ISRs only
19064            bank r14 and r15, so we must check up to r12.
19065            r13 is the stack pointer which is always preserved,
19066            so we do not need to consider it here.  */
19067         max_reg = 7;
19068       else
19069         max_reg = 12;
19070
19071       for (reg = 0; reg <= max_reg; reg++)
19072         if (df_regs_ever_live_p (reg)
19073             || (! crtl->is_leaf && call_used_regs[reg]))
19074           save_reg_mask |= (1 << reg);
19075
19076       /* Also save the pic base register if necessary.  */
19077       if (flag_pic
19078           && !TARGET_SINGLE_PIC_BASE
19079           && arm_pic_register != INVALID_REGNUM
19080           && crtl->uses_pic_offset_table)
19081         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19082     }
19083   else if (IS_VOLATILE(func_type))
19084     {
19085       /* For noreturn functions we historically omitted register saves
19086          altogether.  However this really messes up debugging.  As a
19087          compromise save just the frame pointers.  Combined with the link
19088          register saved elsewhere this should be sufficient to get
19089          a backtrace.  */
19090       if (frame_pointer_needed)
19091         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19092       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19093         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19094       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19095         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19096     }
19097   else
19098     {
19099       /* In the normal case we only need to save those registers
19100          which are call saved and which are used by this function.  */
19101       for (reg = 0; reg <= 11; reg++)
19102         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19103           save_reg_mask |= (1 << reg);
19104
19105       /* Handle the frame pointer as a special case.  */
19106       if (frame_pointer_needed)
19107         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19108
19109       /* If we aren't loading the PIC register,
19110          don't stack it even though it may be live.  */
19111       if (flag_pic
19112           && !TARGET_SINGLE_PIC_BASE
19113           && arm_pic_register != INVALID_REGNUM
19114           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19115               || crtl->uses_pic_offset_table))
19116         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19117
19118       /* The prologue will copy SP into R0, so save it.  */
19119       if (IS_STACKALIGN (func_type))
19120         save_reg_mask |= 1;
19121     }
19122
19123   /* Save registers so the exception handler can modify them.  */
19124   if (crtl->calls_eh_return)
19125     {
19126       unsigned int i;
19127
19128       for (i = 0; ; i++)
19129         {
19130           reg = EH_RETURN_DATA_REGNO (i);
19131           if (reg == INVALID_REGNUM)
19132             break;
19133           save_reg_mask |= 1 << reg;
19134         }
19135     }
19136
19137   return save_reg_mask;
19138 }
19139
19140 /* Return true if r3 is live at the start of the function.  */
19141
19142 static bool
19143 arm_r3_live_at_start_p (void)
19144 {
19145   /* Just look at cfg info, which is still close enough to correct at this
19146      point.  This gives false positives for broken functions that might use
19147      uninitialized data that happens to be allocated in r3, but who cares?  */
19148   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19149 }
19150
19151 /* Compute the number of bytes used to store the static chain register on the
19152    stack, above the stack frame.  We need to know this accurately to get the
19153    alignment of the rest of the stack frame correct.  */
19154
19155 static int
19156 arm_compute_static_chain_stack_bytes (void)
19157 {
19158   /* See the defining assertion in arm_expand_prologue.  */
19159   if (IS_NESTED (arm_current_func_type ())
19160       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19161           || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19162               && !df_regs_ever_live_p (LR_REGNUM)))
19163       && arm_r3_live_at_start_p ()
19164       && crtl->args.pretend_args_size == 0)
19165     return 4;
19166
19167   return 0;
19168 }
19169
19170 /* Compute a bit mask of which core registers need to be
19171    saved on the stack for the current function.
19172    This is used by arm_compute_frame_layout, which may add extra registers.  */
19173
19174 static unsigned long
19175 arm_compute_save_core_reg_mask (void)
19176 {
19177   unsigned int save_reg_mask = 0;
19178   unsigned long func_type = arm_current_func_type ();
19179   unsigned int reg;
19180
19181   if (IS_NAKED (func_type))
19182     /* This should never really happen.  */
19183     return 0;
19184
19185   /* If we are creating a stack frame, then we must save the frame pointer,
19186      IP (which will hold the old stack pointer), LR and the PC.  */
19187   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19188     save_reg_mask |=
19189       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19190       | (1 << IP_REGNUM)
19191       | (1 << LR_REGNUM)
19192       | (1 << PC_REGNUM);
19193
19194   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19195
19196   /* Decide if we need to save the link register.
19197      Interrupt routines have their own banked link register,
19198      so they never need to save it.
19199      Otherwise if we do not use the link register we do not need to save
19200      it.  If we are pushing other registers onto the stack however, we
19201      can save an instruction in the epilogue by pushing the link register
19202      now and then popping it back into the PC.  This incurs extra memory
19203      accesses though, so we only do it when optimizing for size, and only
19204      if we know that we will not need a fancy return sequence.  */
19205   if (df_regs_ever_live_p (LR_REGNUM)
19206       || (save_reg_mask
19207           && optimize_size
19208           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19209           && !crtl->tail_call_emit
19210           && !crtl->calls_eh_return))
19211     save_reg_mask |= 1 << LR_REGNUM;
19212
19213   if (cfun->machine->lr_save_eliminated)
19214     save_reg_mask &= ~ (1 << LR_REGNUM);
19215
19216   if (TARGET_REALLY_IWMMXT
19217       && ((bit_count (save_reg_mask)
19218            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19219                            arm_compute_static_chain_stack_bytes())
19220            ) % 2) != 0)
19221     {
19222       /* The total number of registers that are going to be pushed
19223          onto the stack is odd.  We need to ensure that the stack
19224          is 64-bit aligned before we start to save iWMMXt registers,
19225          and also before we start to create locals.  (A local variable
19226          might be a double or long long which we will load/store using
19227          an iWMMXt instruction).  Therefore we need to push another
19228          ARM register, so that the stack will be 64-bit aligned.  We
19229          try to avoid using the arg registers (r0 -r3) as they might be
19230          used to pass values in a tail call.  */
19231       for (reg = 4; reg <= 12; reg++)
19232         if ((save_reg_mask & (1 << reg)) == 0)
19233           break;
19234
19235       if (reg <= 12)
19236         save_reg_mask |= (1 << reg);
19237       else
19238         {
19239           cfun->machine->sibcall_blocked = 1;
19240           save_reg_mask |= (1 << 3);
19241         }
19242     }
19243
19244   /* We may need to push an additional register for use initializing the
19245      PIC base register.  */
19246   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19247       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19248     {
19249       reg = thumb_find_work_register (1 << 4);
19250       if (!call_used_regs[reg])
19251         save_reg_mask |= (1 << reg);
19252     }
19253
19254   return save_reg_mask;
19255 }
19256
19257 /* Compute a bit mask of which core registers need to be
19258    saved on the stack for the current function.  */
19259 static unsigned long
19260 thumb1_compute_save_core_reg_mask (void)
19261 {
19262   unsigned long mask;
19263   unsigned reg;
19264
19265   mask = 0;
19266   for (reg = 0; reg < 12; reg ++)
19267     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19268       mask |= 1 << reg;
19269
19270   /* Handle the frame pointer as a special case.  */
19271   if (frame_pointer_needed)
19272     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19273
19274   if (flag_pic
19275       && !TARGET_SINGLE_PIC_BASE
19276       && arm_pic_register != INVALID_REGNUM
19277       && crtl->uses_pic_offset_table)
19278     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19279
19280   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19281   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19282     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19283
19284   /* LR will also be pushed if any lo regs are pushed.  */
19285   if (mask & 0xff || thumb_force_lr_save ())
19286     mask |= (1 << LR_REGNUM);
19287
19288   /* Make sure we have a low work register if we need one.
19289      We will need one if we are going to push a high register,
19290      but we are not currently intending to push a low register.  */
19291   if ((mask & 0xff) == 0
19292       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19293     {
19294       /* Use thumb_find_work_register to choose which register
19295          we will use.  If the register is live then we will
19296          have to push it.  Use LAST_LO_REGNUM as our fallback
19297          choice for the register to select.  */
19298       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19299       /* Make sure the register returned by thumb_find_work_register is
19300          not part of the return value.  */
19301       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19302         reg = LAST_LO_REGNUM;
19303
19304       if (callee_saved_reg_p (reg))
19305         mask |= 1 << reg;
19306     }
19307
19308   /* The 504 below is 8 bytes less than 512 because there are two possible
19309      alignment words.  We can't tell here if they will be present or not so we
19310      have to play it safe and assume that they are. */
19311   if ((CALLER_INTERWORKING_SLOT_SIZE +
19312        ROUND_UP_WORD (get_frame_size ()) +
19313        crtl->outgoing_args_size) >= 504)
19314     {
19315       /* This is the same as the code in thumb1_expand_prologue() which
19316          determines which register to use for stack decrement. */
19317       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19318         if (mask & (1 << reg))
19319           break;
19320
19321       if (reg > LAST_LO_REGNUM)
19322         {
19323           /* Make sure we have a register available for stack decrement. */
19324           mask |= 1 << LAST_LO_REGNUM;
19325         }
19326     }
19327
19328   return mask;
19329 }
19330
19331
19332 /* Return the number of bytes required to save VFP registers.  */
19333 static int
19334 arm_get_vfp_saved_size (void)
19335 {
19336   unsigned int regno;
19337   int count;
19338   int saved;
19339
19340   saved = 0;
19341   /* Space for saved VFP registers.  */
19342   if (TARGET_HARD_FLOAT)
19343     {
19344       count = 0;
19345       for (regno = FIRST_VFP_REGNUM;
19346            regno < LAST_VFP_REGNUM;
19347            regno += 2)
19348         {
19349           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19350               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19351             {
19352               if (count > 0)
19353                 {
19354                   /* Workaround ARM10 VFPr1 bug.  */
19355                   if (count == 2 && !arm_arch6)
19356                     count++;
19357                   saved += count * 8;
19358                 }
19359               count = 0;
19360             }
19361           else
19362             count++;
19363         }
19364       if (count > 0)
19365         {
19366           if (count == 2 && !arm_arch6)
19367             count++;
19368           saved += count * 8;
19369         }
19370     }
19371   return saved;
19372 }
19373
19374
19375 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19376    everything bar the final return instruction.  If simple_return is true,
19377    then do not output epilogue, because it has already been emitted in RTL.  */
19378 const char *
19379 output_return_instruction (rtx operand, bool really_return, bool reverse,
19380                            bool simple_return)
19381 {
19382   char conditional[10];
19383   char instr[100];
19384   unsigned reg;
19385   unsigned long live_regs_mask;
19386   unsigned long func_type;
19387   arm_stack_offsets *offsets;
19388
19389   func_type = arm_current_func_type ();
19390
19391   if (IS_NAKED (func_type))
19392     return "";
19393
19394   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19395     {
19396       /* If this function was declared non-returning, and we have
19397          found a tail call, then we have to trust that the called
19398          function won't return.  */
19399       if (really_return)
19400         {
19401           rtx ops[2];
19402
19403           /* Otherwise, trap an attempted return by aborting.  */
19404           ops[0] = operand;
19405           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19406                                        : "abort");
19407           assemble_external_libcall (ops[1]);
19408           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19409         }
19410
19411       return "";
19412     }
19413
19414   gcc_assert (!cfun->calls_alloca || really_return);
19415
19416   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19417
19418   cfun->machine->return_used_this_function = 1;
19419
19420   offsets = arm_get_frame_offsets ();
19421   live_regs_mask = offsets->saved_regs_mask;
19422
19423   if (!simple_return && live_regs_mask)
19424     {
19425       const char * return_reg;
19426
19427       /* If we do not have any special requirements for function exit
19428          (e.g. interworking) then we can load the return address
19429          directly into the PC.  Otherwise we must load it into LR.  */
19430       if (really_return
19431           && !IS_CMSE_ENTRY (func_type)
19432           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19433         return_reg = reg_names[PC_REGNUM];
19434       else
19435         return_reg = reg_names[LR_REGNUM];
19436
19437       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19438         {
19439           /* There are three possible reasons for the IP register
19440              being saved.  1) a stack frame was created, in which case
19441              IP contains the old stack pointer, or 2) an ISR routine
19442              corrupted it, or 3) it was saved to align the stack on
19443              iWMMXt.  In case 1, restore IP into SP, otherwise just
19444              restore IP.  */
19445           if (frame_pointer_needed)
19446             {
19447               live_regs_mask &= ~ (1 << IP_REGNUM);
19448               live_regs_mask |=   (1 << SP_REGNUM);
19449             }
19450           else
19451             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19452         }
19453
19454       /* On some ARM architectures it is faster to use LDR rather than
19455          LDM to load a single register.  On other architectures, the
19456          cost is the same.  In 26 bit mode, or for exception handlers,
19457          we have to use LDM to load the PC so that the CPSR is also
19458          restored.  */
19459       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19460         if (live_regs_mask == (1U << reg))
19461           break;
19462
19463       if (reg <= LAST_ARM_REGNUM
19464           && (reg != LR_REGNUM
19465               || ! really_return
19466               || ! IS_INTERRUPT (func_type)))
19467         {
19468           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19469                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19470         }
19471       else
19472         {
19473           char *p;
19474           int first = 1;
19475
19476           /* Generate the load multiple instruction to restore the
19477              registers.  Note we can get here, even if
19478              frame_pointer_needed is true, but only if sp already
19479              points to the base of the saved core registers.  */
19480           if (live_regs_mask & (1 << SP_REGNUM))
19481             {
19482               unsigned HOST_WIDE_INT stack_adjust;
19483
19484               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19485               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19486
19487               if (stack_adjust && arm_arch5 && TARGET_ARM)
19488                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19489               else
19490                 {
19491                   /* If we can't use ldmib (SA110 bug),
19492                      then try to pop r3 instead.  */
19493                   if (stack_adjust)
19494                     live_regs_mask |= 1 << 3;
19495
19496                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19497                 }
19498             }
19499           /* For interrupt returns we have to use an LDM rather than
19500              a POP so that we can use the exception return variant.  */
19501           else if (IS_INTERRUPT (func_type))
19502             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19503           else
19504             sprintf (instr, "pop%s\t{", conditional);
19505
19506           p = instr + strlen (instr);
19507
19508           for (reg = 0; reg <= SP_REGNUM; reg++)
19509             if (live_regs_mask & (1 << reg))
19510               {
19511                 int l = strlen (reg_names[reg]);
19512
19513                 if (first)
19514                   first = 0;
19515                 else
19516                   {
19517                     memcpy (p, ", ", 2);
19518                     p += 2;
19519                   }
19520
19521                 memcpy (p, "%|", 2);
19522                 memcpy (p + 2, reg_names[reg], l);
19523                 p += l + 2;
19524               }
19525
19526           if (live_regs_mask & (1 << LR_REGNUM))
19527             {
19528               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19529               /* If returning from an interrupt, restore the CPSR.  */
19530               if (IS_INTERRUPT (func_type))
19531                 strcat (p, "^");
19532             }
19533           else
19534             strcpy (p, "}");
19535         }
19536
19537       output_asm_insn (instr, & operand);
19538
19539       /* See if we need to generate an extra instruction to
19540          perform the actual function return.  */
19541       if (really_return
19542           && func_type != ARM_FT_INTERWORKED
19543           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19544         {
19545           /* The return has already been handled
19546              by loading the LR into the PC.  */
19547           return "";
19548         }
19549     }
19550
19551   if (really_return)
19552     {
19553       switch ((int) ARM_FUNC_TYPE (func_type))
19554         {
19555         case ARM_FT_ISR:
19556         case ARM_FT_FIQ:
19557           /* ??? This is wrong for unified assembly syntax.  */
19558           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19559           break;
19560
19561         case ARM_FT_INTERWORKED:
19562           gcc_assert (arm_arch5 || arm_arch4t);
19563           sprintf (instr, "bx%s\t%%|lr", conditional);
19564           break;
19565
19566         case ARM_FT_EXCEPTION:
19567           /* ??? This is wrong for unified assembly syntax.  */
19568           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19569           break;
19570
19571         default:
19572           if (IS_CMSE_ENTRY (func_type))
19573             {
19574               /* Check if we have to clear the 'GE bits' which is only used if
19575                  parallel add and subtraction instructions are available.  */
19576               if (TARGET_INT_SIMD)
19577                 snprintf (instr, sizeof (instr),
19578                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19579               else
19580                 snprintf (instr, sizeof (instr),
19581                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19582
19583               output_asm_insn (instr, & operand);
19584               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19585                 {
19586                   /* Clear the cumulative exception-status bits (0-4,7) and the
19587                      condition code bits (28-31) of the FPSCR.  We need to
19588                      remember to clear the first scratch register used (IP) and
19589                      save and restore the second (r4).  */
19590                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19591                   output_asm_insn (instr, & operand);
19592                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19593                   output_asm_insn (instr, & operand);
19594                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19595                   output_asm_insn (instr, & operand);
19596                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19597                   output_asm_insn (instr, & operand);
19598                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19599                   output_asm_insn (instr, & operand);
19600                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19601                   output_asm_insn (instr, & operand);
19602                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19603                   output_asm_insn (instr, & operand);
19604                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19605                   output_asm_insn (instr, & operand);
19606                 }
19607               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19608             }
19609           /* Use bx if it's available.  */
19610           else if (arm_arch5 || arm_arch4t)
19611             sprintf (instr, "bx%s\t%%|lr", conditional);
19612           else
19613             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19614           break;
19615         }
19616
19617       output_asm_insn (instr, & operand);
19618     }
19619
19620   return "";
19621 }
19622
19623 /* Output in FILE asm statements needed to declare the NAME of the function
19624    defined by its DECL node.  */
19625
19626 void
19627 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19628 {
19629   size_t cmse_name_len;
19630   char *cmse_name = 0;
19631   char cmse_prefix[] = "__acle_se_";
19632
19633   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19634      extra function label for each function with the 'cmse_nonsecure_entry'
19635      attribute.  This extra function label should be prepended with
19636      '__acle_se_', telling the linker that it needs to create secure gateway
19637      veneers for this function.  */
19638   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19639                                     DECL_ATTRIBUTES (decl)))
19640     {
19641       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19642       cmse_name = XALLOCAVEC (char, cmse_name_len);
19643       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19644       targetm.asm_out.globalize_label (file, cmse_name);
19645
19646       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19647       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19648     }
19649
19650   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19651   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19652   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19653   ASM_OUTPUT_LABEL (file, name);
19654
19655   if (cmse_name)
19656     ASM_OUTPUT_LABEL (file, cmse_name);
19657
19658   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19659 }
19660
19661 /* Write the function name into the code section, directly preceding
19662    the function prologue.
19663
19664    Code will be output similar to this:
19665      t0
19666          .ascii "arm_poke_function_name", 0
19667          .align
19668      t1
19669          .word 0xff000000 + (t1 - t0)
19670      arm_poke_function_name
19671          mov     ip, sp
19672          stmfd   sp!, {fp, ip, lr, pc}
19673          sub     fp, ip, #4
19674
19675    When performing a stack backtrace, code can inspect the value
19676    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19677    at location pc - 12 and the top 8 bits are set, then we know
19678    that there is a function name embedded immediately preceding this
19679    location and has length ((pc[-3]) & 0xff000000).
19680
19681    We assume that pc is declared as a pointer to an unsigned long.
19682
19683    It is of no benefit to output the function name if we are assembling
19684    a leaf function.  These function types will not contain a stack
19685    backtrace structure, therefore it is not possible to determine the
19686    function name.  */
19687 void
19688 arm_poke_function_name (FILE *stream, const char *name)
19689 {
19690   unsigned long alignlength;
19691   unsigned long length;
19692   rtx           x;
19693
19694   length      = strlen (name) + 1;
19695   alignlength = ROUND_UP_WORD (length);
19696
19697   ASM_OUTPUT_ASCII (stream, name, length);
19698   ASM_OUTPUT_ALIGN (stream, 2);
19699   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19700   assemble_aligned_integer (UNITS_PER_WORD, x);
19701 }
19702
19703 /* Place some comments into the assembler stream
19704    describing the current function.  */
19705 static void
19706 arm_output_function_prologue (FILE *f)
19707 {
19708   unsigned long func_type;
19709
19710   /* Sanity check.  */
19711   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19712
19713   func_type = arm_current_func_type ();
19714
19715   switch ((int) ARM_FUNC_TYPE (func_type))
19716     {
19717     default:
19718     case ARM_FT_NORMAL:
19719       break;
19720     case ARM_FT_INTERWORKED:
19721       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19722       break;
19723     case ARM_FT_ISR:
19724       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19725       break;
19726     case ARM_FT_FIQ:
19727       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19728       break;
19729     case ARM_FT_EXCEPTION:
19730       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19731       break;
19732     }
19733
19734   if (IS_NAKED (func_type))
19735     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19736
19737   if (IS_VOLATILE (func_type))
19738     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19739
19740   if (IS_NESTED (func_type))
19741     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19742   if (IS_STACKALIGN (func_type))
19743     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19744   if (IS_CMSE_ENTRY (func_type))
19745     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19746
19747   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19748                crtl->args.size,
19749                crtl->args.pretend_args_size,
19750                (HOST_WIDE_INT) get_frame_size ());
19751
19752   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19753                frame_pointer_needed,
19754                cfun->machine->uses_anonymous_args);
19755
19756   if (cfun->machine->lr_save_eliminated)
19757     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19758
19759   if (crtl->calls_eh_return)
19760     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19761
19762 }
19763
19764 static void
19765 arm_output_function_epilogue (FILE *)
19766 {
19767   arm_stack_offsets *offsets;
19768
19769   if (TARGET_THUMB1)
19770     {
19771       int regno;
19772
19773       /* Emit any call-via-reg trampolines that are needed for v4t support
19774          of call_reg and call_value_reg type insns.  */
19775       for (regno = 0; regno < LR_REGNUM; regno++)
19776         {
19777           rtx label = cfun->machine->call_via[regno];
19778
19779           if (label != NULL)
19780             {
19781               switch_to_section (function_section (current_function_decl));
19782               targetm.asm_out.internal_label (asm_out_file, "L",
19783                                               CODE_LABEL_NUMBER (label));
19784               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19785             }
19786         }
19787
19788       /* ??? Probably not safe to set this here, since it assumes that a
19789          function will be emitted as assembly immediately after we generate
19790          RTL for it.  This does not happen for inline functions.  */
19791       cfun->machine->return_used_this_function = 0;
19792     }
19793   else /* TARGET_32BIT */
19794     {
19795       /* We need to take into account any stack-frame rounding.  */
19796       offsets = arm_get_frame_offsets ();
19797
19798       gcc_assert (!use_return_insn (FALSE, NULL)
19799                   || (cfun->machine->return_used_this_function != 0)
19800                   || offsets->saved_regs == offsets->outgoing_args
19801                   || frame_pointer_needed);
19802     }
19803 }
19804
19805 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19806    STR and STRD.  If an even number of registers are being pushed, one
19807    or more STRD patterns are created for each register pair.  If an
19808    odd number of registers are pushed, emit an initial STR followed by
19809    as many STRD instructions as are needed.  This works best when the
19810    stack is initially 64-bit aligned (the normal case), since it
19811    ensures that each STRD is also 64-bit aligned.  */
19812 static void
19813 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19814 {
19815   int num_regs = 0;
19816   int i;
19817   int regno;
19818   rtx par = NULL_RTX;
19819   rtx dwarf = NULL_RTX;
19820   rtx tmp;
19821   bool first = true;
19822
19823   num_regs = bit_count (saved_regs_mask);
19824
19825   /* Must be at least one register to save, and can't save SP or PC.  */
19826   gcc_assert (num_regs > 0 && num_regs <= 14);
19827   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19828   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19829
19830   /* Create sequence for DWARF info.  All the frame-related data for
19831      debugging is held in this wrapper.  */
19832   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19833
19834   /* Describe the stack adjustment.  */
19835   tmp = gen_rtx_SET (stack_pointer_rtx,
19836                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19837   RTX_FRAME_RELATED_P (tmp) = 1;
19838   XVECEXP (dwarf, 0, 0) = tmp;
19839
19840   /* Find the first register.  */
19841   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19842     ;
19843
19844   i = 0;
19845
19846   /* If there's an odd number of registers to push.  Start off by
19847      pushing a single register.  This ensures that subsequent strd
19848      operations are dword aligned (assuming that SP was originally
19849      64-bit aligned).  */
19850   if ((num_regs & 1) != 0)
19851     {
19852       rtx reg, mem, insn;
19853
19854       reg = gen_rtx_REG (SImode, regno);
19855       if (num_regs == 1)
19856         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19857                                                      stack_pointer_rtx));
19858       else
19859         mem = gen_frame_mem (Pmode,
19860                              gen_rtx_PRE_MODIFY
19861                              (Pmode, stack_pointer_rtx,
19862                               plus_constant (Pmode, stack_pointer_rtx,
19863                                              -4 * num_regs)));
19864
19865       tmp = gen_rtx_SET (mem, reg);
19866       RTX_FRAME_RELATED_P (tmp) = 1;
19867       insn = emit_insn (tmp);
19868       RTX_FRAME_RELATED_P (insn) = 1;
19869       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19870       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19871       RTX_FRAME_RELATED_P (tmp) = 1;
19872       i++;
19873       regno++;
19874       XVECEXP (dwarf, 0, i) = tmp;
19875       first = false;
19876     }
19877
19878   while (i < num_regs)
19879     if (saved_regs_mask & (1 << regno))
19880       {
19881         rtx reg1, reg2, mem1, mem2;
19882         rtx tmp0, tmp1, tmp2;
19883         int regno2;
19884
19885         /* Find the register to pair with this one.  */
19886         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19887              regno2++)
19888           ;
19889
19890         reg1 = gen_rtx_REG (SImode, regno);
19891         reg2 = gen_rtx_REG (SImode, regno2);
19892
19893         if (first)
19894           {
19895             rtx insn;
19896
19897             first = false;
19898             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19899                                                         stack_pointer_rtx,
19900                                                         -4 * num_regs));
19901             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19902                                                         stack_pointer_rtx,
19903                                                         -4 * (num_regs - 1)));
19904             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19905                                 plus_constant (Pmode, stack_pointer_rtx,
19906                                                -4 * (num_regs)));
19907             tmp1 = gen_rtx_SET (mem1, reg1);
19908             tmp2 = gen_rtx_SET (mem2, reg2);
19909             RTX_FRAME_RELATED_P (tmp0) = 1;
19910             RTX_FRAME_RELATED_P (tmp1) = 1;
19911             RTX_FRAME_RELATED_P (tmp2) = 1;
19912             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19913             XVECEXP (par, 0, 0) = tmp0;
19914             XVECEXP (par, 0, 1) = tmp1;
19915             XVECEXP (par, 0, 2) = tmp2;
19916             insn = emit_insn (par);
19917             RTX_FRAME_RELATED_P (insn) = 1;
19918             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19919           }
19920         else
19921           {
19922             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19923                                                         stack_pointer_rtx,
19924                                                         4 * i));
19925             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19926                                                         stack_pointer_rtx,
19927                                                         4 * (i + 1)));
19928             tmp1 = gen_rtx_SET (mem1, reg1);
19929             tmp2 = gen_rtx_SET (mem2, reg2);
19930             RTX_FRAME_RELATED_P (tmp1) = 1;
19931             RTX_FRAME_RELATED_P (tmp2) = 1;
19932             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19933             XVECEXP (par, 0, 0) = tmp1;
19934             XVECEXP (par, 0, 1) = tmp2;
19935             emit_insn (par);
19936           }
19937
19938         /* Create unwind information.  This is an approximation.  */
19939         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19940                                            plus_constant (Pmode,
19941                                                           stack_pointer_rtx,
19942                                                           4 * i)),
19943                             reg1);
19944         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19945                                            plus_constant (Pmode,
19946                                                           stack_pointer_rtx,
19947                                                           4 * (i + 1))),
19948                             reg2);
19949
19950         RTX_FRAME_RELATED_P (tmp1) = 1;
19951         RTX_FRAME_RELATED_P (tmp2) = 1;
19952         XVECEXP (dwarf, 0, i + 1) = tmp1;
19953         XVECEXP (dwarf, 0, i + 2) = tmp2;
19954         i += 2;
19955         regno = regno2 + 1;
19956       }
19957     else
19958       regno++;
19959
19960   return;
19961 }
19962
19963 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19964    whenever possible, otherwise it emits single-word stores.  The first store
19965    also allocates stack space for all saved registers, using writeback with
19966    post-addressing mode.  All other stores use offset addressing.  If no STRD
19967    can be emitted, this function emits a sequence of single-word stores,
19968    and not an STM as before, because single-word stores provide more freedom
19969    scheduling and can be turned into an STM by peephole optimizations.  */
19970 static void
19971 arm_emit_strd_push (unsigned long saved_regs_mask)
19972 {
19973   int num_regs = 0;
19974   int i, j, dwarf_index  = 0;
19975   int offset = 0;
19976   rtx dwarf = NULL_RTX;
19977   rtx insn = NULL_RTX;
19978   rtx tmp, mem;
19979
19980   /* TODO: A more efficient code can be emitted by changing the
19981      layout, e.g., first push all pairs that can use STRD to keep the
19982      stack aligned, and then push all other registers.  */
19983   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19984     if (saved_regs_mask & (1 << i))
19985       num_regs++;
19986
19987   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19988   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19989   gcc_assert (num_regs > 0);
19990
19991   /* Create sequence for DWARF info.  */
19992   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19993
19994   /* For dwarf info, we generate explicit stack update.  */
19995   tmp = gen_rtx_SET (stack_pointer_rtx,
19996                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19997   RTX_FRAME_RELATED_P (tmp) = 1;
19998   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19999
20000   /* Save registers.  */
20001   offset = - 4 * num_regs;
20002   j = 0;
20003   while (j <= LAST_ARM_REGNUM)
20004     if (saved_regs_mask & (1 << j))
20005       {
20006         if ((j % 2 == 0)
20007             && (saved_regs_mask & (1 << (j + 1))))
20008           {
20009             /* Current register and previous register form register pair for
20010                which STRD can be generated.  */
20011             if (offset < 0)
20012               {
20013                 /* Allocate stack space for all saved registers.  */
20014                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20015                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20016                 mem = gen_frame_mem (DImode, tmp);
20017                 offset = 0;
20018               }
20019             else if (offset > 0)
20020               mem = gen_frame_mem (DImode,
20021                                    plus_constant (Pmode,
20022                                                   stack_pointer_rtx,
20023                                                   offset));
20024             else
20025               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20026
20027             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20028             RTX_FRAME_RELATED_P (tmp) = 1;
20029             tmp = emit_insn (tmp);
20030
20031             /* Record the first store insn.  */
20032             if (dwarf_index == 1)
20033               insn = tmp;
20034
20035             /* Generate dwarf info.  */
20036             mem = gen_frame_mem (SImode,
20037                                  plus_constant (Pmode,
20038                                                 stack_pointer_rtx,
20039                                                 offset));
20040             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20041             RTX_FRAME_RELATED_P (tmp) = 1;
20042             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20043
20044             mem = gen_frame_mem (SImode,
20045                                  plus_constant (Pmode,
20046                                                 stack_pointer_rtx,
20047                                                 offset + 4));
20048             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20049             RTX_FRAME_RELATED_P (tmp) = 1;
20050             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20051
20052             offset += 8;
20053             j += 2;
20054           }
20055         else
20056           {
20057             /* Emit a single word store.  */
20058             if (offset < 0)
20059               {
20060                 /* Allocate stack space for all saved registers.  */
20061                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20062                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20063                 mem = gen_frame_mem (SImode, tmp);
20064                 offset = 0;
20065               }
20066             else if (offset > 0)
20067               mem = gen_frame_mem (SImode,
20068                                    plus_constant (Pmode,
20069                                                   stack_pointer_rtx,
20070                                                   offset));
20071             else
20072               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20073
20074             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20075             RTX_FRAME_RELATED_P (tmp) = 1;
20076             tmp = emit_insn (tmp);
20077
20078             /* Record the first store insn.  */
20079             if (dwarf_index == 1)
20080               insn = tmp;
20081
20082             /* Generate dwarf info.  */
20083             mem = gen_frame_mem (SImode,
20084                                  plus_constant(Pmode,
20085                                                stack_pointer_rtx,
20086                                                offset));
20087             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20088             RTX_FRAME_RELATED_P (tmp) = 1;
20089             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20090
20091             offset += 4;
20092             j += 1;
20093           }
20094       }
20095     else
20096       j++;
20097
20098   /* Attach dwarf info to the first insn we generate.  */
20099   gcc_assert (insn != NULL_RTX);
20100   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20101   RTX_FRAME_RELATED_P (insn) = 1;
20102 }
20103
20104 /* Generate and emit an insn that we will recognize as a push_multi.
20105    Unfortunately, since this insn does not reflect very well the actual
20106    semantics of the operation, we need to annotate the insn for the benefit
20107    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20108    MASK for registers that should be annotated for DWARF2 frame unwind
20109    information.  */
20110 static rtx
20111 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20112 {
20113   int num_regs = 0;
20114   int num_dwarf_regs = 0;
20115   int i, j;
20116   rtx par;
20117   rtx dwarf;
20118   int dwarf_par_index;
20119   rtx tmp, reg;
20120
20121   /* We don't record the PC in the dwarf frame information.  */
20122   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20123
20124   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20125     {
20126       if (mask & (1 << i))
20127         num_regs++;
20128       if (dwarf_regs_mask & (1 << i))
20129         num_dwarf_regs++;
20130     }
20131
20132   gcc_assert (num_regs && num_regs <= 16);
20133   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20134
20135   /* For the body of the insn we are going to generate an UNSPEC in
20136      parallel with several USEs.  This allows the insn to be recognized
20137      by the push_multi pattern in the arm.md file.
20138
20139      The body of the insn looks something like this:
20140
20141        (parallel [
20142            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20143                                         (const_int:SI <num>)))
20144                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20145            (use (reg:SI XX))
20146            (use (reg:SI YY))
20147            ...
20148         ])
20149
20150      For the frame note however, we try to be more explicit and actually
20151      show each register being stored into the stack frame, plus a (single)
20152      decrement of the stack pointer.  We do it this way in order to be
20153      friendly to the stack unwinding code, which only wants to see a single
20154      stack decrement per instruction.  The RTL we generate for the note looks
20155      something like this:
20156
20157       (sequence [
20158            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20159            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20160            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20161            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20162            ...
20163         ])
20164
20165      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20166      instead we'd have a parallel expression detailing all
20167      the stores to the various memory addresses so that debug
20168      information is more up-to-date. Remember however while writing
20169      this to take care of the constraints with the push instruction.
20170
20171      Note also that this has to be taken care of for the VFP registers.
20172
20173      For more see PR43399.  */
20174
20175   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20176   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20177   dwarf_par_index = 1;
20178
20179   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20180     {
20181       if (mask & (1 << i))
20182         {
20183           reg = gen_rtx_REG (SImode, i);
20184
20185           XVECEXP (par, 0, 0)
20186             = gen_rtx_SET (gen_frame_mem
20187                            (BLKmode,
20188                             gen_rtx_PRE_MODIFY (Pmode,
20189                                                 stack_pointer_rtx,
20190                                                 plus_constant
20191                                                 (Pmode, stack_pointer_rtx,
20192                                                  -4 * num_regs))
20193                             ),
20194                            gen_rtx_UNSPEC (BLKmode,
20195                                            gen_rtvec (1, reg),
20196                                            UNSPEC_PUSH_MULT));
20197
20198           if (dwarf_regs_mask & (1 << i))
20199             {
20200               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20201                                  reg);
20202               RTX_FRAME_RELATED_P (tmp) = 1;
20203               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20204             }
20205
20206           break;
20207         }
20208     }
20209
20210   for (j = 1, i++; j < num_regs; i++)
20211     {
20212       if (mask & (1 << i))
20213         {
20214           reg = gen_rtx_REG (SImode, i);
20215
20216           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20217
20218           if (dwarf_regs_mask & (1 << i))
20219             {
20220               tmp
20221                 = gen_rtx_SET (gen_frame_mem
20222                                (SImode,
20223                                 plus_constant (Pmode, stack_pointer_rtx,
20224                                                4 * j)),
20225                                reg);
20226               RTX_FRAME_RELATED_P (tmp) = 1;
20227               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20228             }
20229
20230           j++;
20231         }
20232     }
20233
20234   par = emit_insn (par);
20235
20236   tmp = gen_rtx_SET (stack_pointer_rtx,
20237                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20238   RTX_FRAME_RELATED_P (tmp) = 1;
20239   XVECEXP (dwarf, 0, 0) = tmp;
20240
20241   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20242
20243   return par;
20244 }
20245
20246 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20247    SIZE is the offset to be adjusted.
20248    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20249 static void
20250 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20251 {
20252   rtx dwarf;
20253
20254   RTX_FRAME_RELATED_P (insn) = 1;
20255   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20256   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20257 }
20258
20259 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20260    SAVED_REGS_MASK shows which registers need to be restored.
20261
20262    Unfortunately, since this insn does not reflect very well the actual
20263    semantics of the operation, we need to annotate the insn for the benefit
20264    of DWARF2 frame unwind information.  */
20265 static void
20266 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20267 {
20268   int num_regs = 0;
20269   int i, j;
20270   rtx par;
20271   rtx dwarf = NULL_RTX;
20272   rtx tmp, reg;
20273   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20274   int offset_adj;
20275   int emit_update;
20276
20277   offset_adj = return_in_pc ? 1 : 0;
20278   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20279     if (saved_regs_mask & (1 << i))
20280       num_regs++;
20281
20282   gcc_assert (num_regs && num_regs <= 16);
20283
20284   /* If SP is in reglist, then we don't emit SP update insn.  */
20285   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20286
20287   /* The parallel needs to hold num_regs SETs
20288      and one SET for the stack update.  */
20289   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20290
20291   if (return_in_pc)
20292     XVECEXP (par, 0, 0) = ret_rtx;
20293
20294   if (emit_update)
20295     {
20296       /* Increment the stack pointer, based on there being
20297          num_regs 4-byte registers to restore.  */
20298       tmp = gen_rtx_SET (stack_pointer_rtx,
20299                          plus_constant (Pmode,
20300                                         stack_pointer_rtx,
20301                                         4 * num_regs));
20302       RTX_FRAME_RELATED_P (tmp) = 1;
20303       XVECEXP (par, 0, offset_adj) = tmp;
20304     }
20305
20306   /* Now restore every reg, which may include PC.  */
20307   for (j = 0, i = 0; j < num_regs; i++)
20308     if (saved_regs_mask & (1 << i))
20309       {
20310         reg = gen_rtx_REG (SImode, i);
20311         if ((num_regs == 1) && emit_update && !return_in_pc)
20312           {
20313             /* Emit single load with writeback.  */
20314             tmp = gen_frame_mem (SImode,
20315                                  gen_rtx_POST_INC (Pmode,
20316                                                    stack_pointer_rtx));
20317             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20318             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20319             return;
20320           }
20321
20322         tmp = gen_rtx_SET (reg,
20323                            gen_frame_mem
20324                            (SImode,
20325                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20326         RTX_FRAME_RELATED_P (tmp) = 1;
20327         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20328
20329         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20330            should not have PC, skip PC.  */
20331         if (i != PC_REGNUM)
20332           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20333
20334         j++;
20335       }
20336
20337   if (return_in_pc)
20338     par = emit_jump_insn (par);
20339   else
20340     par = emit_insn (par);
20341
20342   REG_NOTES (par) = dwarf;
20343   if (!return_in_pc)
20344     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20345                                  stack_pointer_rtx, stack_pointer_rtx);
20346 }
20347
20348 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20349    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20350
20351    Unfortunately, since this insn does not reflect very well the actual
20352    semantics of the operation, we need to annotate the insn for the benefit
20353    of DWARF2 frame unwind information.  */
20354 static void
20355 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20356 {
20357   int i, j;
20358   rtx par;
20359   rtx dwarf = NULL_RTX;
20360   rtx tmp, reg;
20361
20362   gcc_assert (num_regs && num_regs <= 32);
20363
20364     /* Workaround ARM10 VFPr1 bug.  */
20365   if (num_regs == 2 && !arm_arch6)
20366     {
20367       if (first_reg == 15)
20368         first_reg--;
20369
20370       num_regs++;
20371     }
20372
20373   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20374      there could be up to 32 D-registers to restore.
20375      If there are more than 16 D-registers, make two recursive calls,
20376      each of which emits one pop_multi instruction.  */
20377   if (num_regs > 16)
20378     {
20379       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20380       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20381       return;
20382     }
20383
20384   /* The parallel needs to hold num_regs SETs
20385      and one SET for the stack update.  */
20386   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20387
20388   /* Increment the stack pointer, based on there being
20389      num_regs 8-byte registers to restore.  */
20390   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20391   RTX_FRAME_RELATED_P (tmp) = 1;
20392   XVECEXP (par, 0, 0) = tmp;
20393
20394   /* Now show every reg that will be restored, using a SET for each.  */
20395   for (j = 0, i=first_reg; j < num_regs; i += 2)
20396     {
20397       reg = gen_rtx_REG (DFmode, i);
20398
20399       tmp = gen_rtx_SET (reg,
20400                          gen_frame_mem
20401                          (DFmode,
20402                           plus_constant (Pmode, base_reg, 8 * j)));
20403       RTX_FRAME_RELATED_P (tmp) = 1;
20404       XVECEXP (par, 0, j + 1) = tmp;
20405
20406       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20407
20408       j++;
20409     }
20410
20411   par = emit_insn (par);
20412   REG_NOTES (par) = dwarf;
20413
20414   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20415   if (REGNO (base_reg) == IP_REGNUM)
20416     {
20417       RTX_FRAME_RELATED_P (par) = 1;
20418       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20419     }
20420   else
20421     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20422                                  base_reg, base_reg);
20423 }
20424
20425 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20426    number of registers are being popped, multiple LDRD patterns are created for
20427    all register pairs.  If odd number of registers are popped, last register is
20428    loaded by using LDR pattern.  */
20429 static void
20430 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20431 {
20432   int num_regs = 0;
20433   int i, j;
20434   rtx par = NULL_RTX;
20435   rtx dwarf = NULL_RTX;
20436   rtx tmp, reg, tmp1;
20437   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20438
20439   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20440     if (saved_regs_mask & (1 << i))
20441       num_regs++;
20442
20443   gcc_assert (num_regs && num_regs <= 16);
20444
20445   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20446      to be popped.  So, if num_regs is even, now it will become odd,
20447      and we can generate pop with PC.  If num_regs is odd, it will be
20448      even now, and ldr with return can be generated for PC.  */
20449   if (return_in_pc)
20450     num_regs--;
20451
20452   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20453
20454   /* Var j iterates over all the registers to gather all the registers in
20455      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20456      A PARALLEL RTX of register-pair is created here, so that pattern for
20457      LDRD can be matched.  As PC is always last register to be popped, and
20458      we have already decremented num_regs if PC, we don't have to worry
20459      about PC in this loop.  */
20460   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20461     if (saved_regs_mask & (1 << j))
20462       {
20463         /* Create RTX for memory load.  */
20464         reg = gen_rtx_REG (SImode, j);
20465         tmp = gen_rtx_SET (reg,
20466                            gen_frame_mem (SImode,
20467                                plus_constant (Pmode,
20468                                               stack_pointer_rtx, 4 * i)));
20469         RTX_FRAME_RELATED_P (tmp) = 1;
20470
20471         if (i % 2 == 0)
20472           {
20473             /* When saved-register index (i) is even, the RTX to be emitted is
20474                yet to be created.  Hence create it first.  The LDRD pattern we
20475                are generating is :
20476                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20477                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20478                where target registers need not be consecutive.  */
20479             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20480             dwarf = NULL_RTX;
20481           }
20482
20483         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20484            added as 0th element and if i is odd, reg_i is added as 1st element
20485            of LDRD pattern shown above.  */
20486         XVECEXP (par, 0, (i % 2)) = tmp;
20487         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20488
20489         if ((i % 2) == 1)
20490           {
20491             /* When saved-register index (i) is odd, RTXs for both the registers
20492                to be loaded are generated in above given LDRD pattern, and the
20493                pattern can be emitted now.  */
20494             par = emit_insn (par);
20495             REG_NOTES (par) = dwarf;
20496             RTX_FRAME_RELATED_P (par) = 1;
20497           }
20498
20499         i++;
20500       }
20501
20502   /* If the number of registers pushed is odd AND return_in_pc is false OR
20503      number of registers are even AND return_in_pc is true, last register is
20504      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20505      then LDR with post increment.  */
20506
20507   /* Increment the stack pointer, based on there being
20508      num_regs 4-byte registers to restore.  */
20509   tmp = gen_rtx_SET (stack_pointer_rtx,
20510                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20511   RTX_FRAME_RELATED_P (tmp) = 1;
20512   tmp = emit_insn (tmp);
20513   if (!return_in_pc)
20514     {
20515       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20516                                    stack_pointer_rtx, stack_pointer_rtx);
20517     }
20518
20519   dwarf = NULL_RTX;
20520
20521   if (((num_regs % 2) == 1 && !return_in_pc)
20522       || ((num_regs % 2) == 0 && return_in_pc))
20523     {
20524       /* Scan for the single register to be popped.  Skip until the saved
20525          register is found.  */
20526       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20527
20528       /* Gen LDR with post increment here.  */
20529       tmp1 = gen_rtx_MEM (SImode,
20530                           gen_rtx_POST_INC (SImode,
20531                                             stack_pointer_rtx));
20532       set_mem_alias_set (tmp1, get_frame_alias_set ());
20533
20534       reg = gen_rtx_REG (SImode, j);
20535       tmp = gen_rtx_SET (reg, tmp1);
20536       RTX_FRAME_RELATED_P (tmp) = 1;
20537       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20538
20539       if (return_in_pc)
20540         {
20541           /* If return_in_pc, j must be PC_REGNUM.  */
20542           gcc_assert (j == PC_REGNUM);
20543           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20544           XVECEXP (par, 0, 0) = ret_rtx;
20545           XVECEXP (par, 0, 1) = tmp;
20546           par = emit_jump_insn (par);
20547         }
20548       else
20549         {
20550           par = emit_insn (tmp);
20551           REG_NOTES (par) = dwarf;
20552           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20553                                        stack_pointer_rtx, stack_pointer_rtx);
20554         }
20555
20556     }
20557   else if ((num_regs % 2) == 1 && return_in_pc)
20558     {
20559       /* There are 2 registers to be popped.  So, generate the pattern
20560          pop_multiple_with_stack_update_and_return to pop in PC.  */
20561       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20562     }
20563
20564   return;
20565 }
20566
20567 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20568    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20569    offset addressing and then generates one separate stack udpate. This provides
20570    more scheduling freedom, compared to writeback on every load.  However,
20571    if the function returns using load into PC directly
20572    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20573    before the last load.  TODO: Add a peephole optimization to recognize
20574    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20575    peephole optimization to merge the load at stack-offset zero
20576    with the stack update instruction using load with writeback
20577    in post-index addressing mode.  */
20578 static void
20579 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20580 {
20581   int j = 0;
20582   int offset = 0;
20583   rtx par = NULL_RTX;
20584   rtx dwarf = NULL_RTX;
20585   rtx tmp, mem;
20586
20587   /* Restore saved registers.  */
20588   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20589   j = 0;
20590   while (j <= LAST_ARM_REGNUM)
20591     if (saved_regs_mask & (1 << j))
20592       {
20593         if ((j % 2) == 0
20594             && (saved_regs_mask & (1 << (j + 1)))
20595             && (j + 1) != PC_REGNUM)
20596           {
20597             /* Current register and next register form register pair for which
20598                LDRD can be generated. PC is always the last register popped, and
20599                we handle it separately.  */
20600             if (offset > 0)
20601               mem = gen_frame_mem (DImode,
20602                                    plus_constant (Pmode,
20603                                                   stack_pointer_rtx,
20604                                                   offset));
20605             else
20606               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20607
20608             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20609             tmp = emit_insn (tmp);
20610             RTX_FRAME_RELATED_P (tmp) = 1;
20611
20612             /* Generate dwarf info.  */
20613
20614             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20615                                     gen_rtx_REG (SImode, j),
20616                                     NULL_RTX);
20617             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20618                                     gen_rtx_REG (SImode, j + 1),
20619                                     dwarf);
20620
20621             REG_NOTES (tmp) = dwarf;
20622
20623             offset += 8;
20624             j += 2;
20625           }
20626         else if (j != PC_REGNUM)
20627           {
20628             /* Emit a single word load.  */
20629             if (offset > 0)
20630               mem = gen_frame_mem (SImode,
20631                                    plus_constant (Pmode,
20632                                                   stack_pointer_rtx,
20633                                                   offset));
20634             else
20635               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20636
20637             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20638             tmp = emit_insn (tmp);
20639             RTX_FRAME_RELATED_P (tmp) = 1;
20640
20641             /* Generate dwarf info.  */
20642             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20643                                               gen_rtx_REG (SImode, j),
20644                                               NULL_RTX);
20645
20646             offset += 4;
20647             j += 1;
20648           }
20649         else /* j == PC_REGNUM */
20650           j++;
20651       }
20652     else
20653       j++;
20654
20655   /* Update the stack.  */
20656   if (offset > 0)
20657     {
20658       tmp = gen_rtx_SET (stack_pointer_rtx,
20659                          plus_constant (Pmode,
20660                                         stack_pointer_rtx,
20661                                         offset));
20662       tmp = emit_insn (tmp);
20663       arm_add_cfa_adjust_cfa_note (tmp, offset,
20664                                    stack_pointer_rtx, stack_pointer_rtx);
20665       offset = 0;
20666     }
20667
20668   if (saved_regs_mask & (1 << PC_REGNUM))
20669     {
20670       /* Only PC is to be popped.  */
20671       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20672       XVECEXP (par, 0, 0) = ret_rtx;
20673       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20674                          gen_frame_mem (SImode,
20675                                         gen_rtx_POST_INC (SImode,
20676                                                           stack_pointer_rtx)));
20677       RTX_FRAME_RELATED_P (tmp) = 1;
20678       XVECEXP (par, 0, 1) = tmp;
20679       par = emit_jump_insn (par);
20680
20681       /* Generate dwarf info.  */
20682       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20683                               gen_rtx_REG (SImode, PC_REGNUM),
20684                               NULL_RTX);
20685       REG_NOTES (par) = dwarf;
20686       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20687                                    stack_pointer_rtx, stack_pointer_rtx);
20688     }
20689 }
20690
20691 /* Calculate the size of the return value that is passed in registers.  */
20692 static unsigned
20693 arm_size_return_regs (void)
20694 {
20695   machine_mode mode;
20696
20697   if (crtl->return_rtx != 0)
20698     mode = GET_MODE (crtl->return_rtx);
20699   else
20700     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20701
20702   return GET_MODE_SIZE (mode);
20703 }
20704
20705 /* Return true if the current function needs to save/restore LR.  */
20706 static bool
20707 thumb_force_lr_save (void)
20708 {
20709   return !cfun->machine->lr_save_eliminated
20710          && (!crtl->is_leaf
20711              || thumb_far_jump_used_p ()
20712              || df_regs_ever_live_p (LR_REGNUM));
20713 }
20714
20715 /* We do not know if r3 will be available because
20716    we do have an indirect tailcall happening in this
20717    particular case.  */
20718 static bool
20719 is_indirect_tailcall_p (rtx call)
20720 {
20721   rtx pat = PATTERN (call);
20722
20723   /* Indirect tail call.  */
20724   pat = XVECEXP (pat, 0, 0);
20725   if (GET_CODE (pat) == SET)
20726     pat = SET_SRC (pat);
20727
20728   pat = XEXP (XEXP (pat, 0), 0);
20729   return REG_P (pat);
20730 }
20731
20732 /* Return true if r3 is used by any of the tail call insns in the
20733    current function.  */
20734 static bool
20735 any_sibcall_could_use_r3 (void)
20736 {
20737   edge_iterator ei;
20738   edge e;
20739
20740   if (!crtl->tail_call_emit)
20741     return false;
20742   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20743     if (e->flags & EDGE_SIBCALL)
20744       {
20745         rtx_insn *call = BB_END (e->src);
20746         if (!CALL_P (call))
20747           call = prev_nonnote_nondebug_insn (call);
20748         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20749         if (find_regno_fusage (call, USE, 3)
20750             || is_indirect_tailcall_p (call))
20751           return true;
20752       }
20753   return false;
20754 }
20755
20756
20757 /* Compute the distance from register FROM to register TO.
20758    These can be the arg pointer (26), the soft frame pointer (25),
20759    the stack pointer (13) or the hard frame pointer (11).
20760    In thumb mode r7 is used as the soft frame pointer, if needed.
20761    Typical stack layout looks like this:
20762
20763        old stack pointer -> |    |
20764                              ----
20765                             |    | \
20766                             |    |   saved arguments for
20767                             |    |   vararg functions
20768                             |    | /
20769                               --
20770    hard FP & arg pointer -> |    | \
20771                             |    |   stack
20772                             |    |   frame
20773                             |    | /
20774                               --
20775                             |    | \
20776                             |    |   call saved
20777                             |    |   registers
20778       soft frame pointer -> |    | /
20779                               --
20780                             |    | \
20781                             |    |   local
20782                             |    |   variables
20783      locals base pointer -> |    | /
20784                               --
20785                             |    | \
20786                             |    |   outgoing
20787                             |    |   arguments
20788    current stack pointer -> |    | /
20789                               --
20790
20791   For a given function some or all of these stack components
20792   may not be needed, giving rise to the possibility of
20793   eliminating some of the registers.
20794
20795   The values returned by this function must reflect the behavior
20796   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20797
20798   The sign of the number returned reflects the direction of stack
20799   growth, so the values are positive for all eliminations except
20800   from the soft frame pointer to the hard frame pointer.
20801
20802   SFP may point just inside the local variables block to ensure correct
20803   alignment.  */
20804
20805
20806 /* Return cached stack offsets.  */
20807
20808 static arm_stack_offsets *
20809 arm_get_frame_offsets (void)
20810 {
20811   struct arm_stack_offsets *offsets;
20812
20813   offsets = &cfun->machine->stack_offsets;
20814
20815   return offsets;
20816 }
20817
20818
20819 /* Calculate stack offsets.  These are used to calculate register elimination
20820    offsets and in prologue/epilogue code.  Also calculates which registers
20821    should be saved.  */
20822
20823 static void
20824 arm_compute_frame_layout (void)
20825 {
20826   struct arm_stack_offsets *offsets;
20827   unsigned long func_type;
20828   int saved;
20829   int core_saved;
20830   HOST_WIDE_INT frame_size;
20831   int i;
20832
20833   offsets = &cfun->machine->stack_offsets;
20834
20835   /* Initially this is the size of the local variables.  It will translated
20836      into an offset once we have determined the size of preceding data.  */
20837   frame_size = ROUND_UP_WORD (get_frame_size ());
20838
20839   /* Space for variadic functions.  */
20840   offsets->saved_args = crtl->args.pretend_args_size;
20841
20842   /* In Thumb mode this is incorrect, but never used.  */
20843   offsets->frame
20844     = (offsets->saved_args
20845        + arm_compute_static_chain_stack_bytes ()
20846        + (frame_pointer_needed ? 4 : 0));
20847
20848   if (TARGET_32BIT)
20849     {
20850       unsigned int regno;
20851
20852       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20853       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20854       saved = core_saved;
20855
20856       /* We know that SP will be doubleword aligned on entry, and we must
20857          preserve that condition at any subroutine call.  We also require the
20858          soft frame pointer to be doubleword aligned.  */
20859
20860       if (TARGET_REALLY_IWMMXT)
20861         {
20862           /* Check for the call-saved iWMMXt registers.  */
20863           for (regno = FIRST_IWMMXT_REGNUM;
20864                regno <= LAST_IWMMXT_REGNUM;
20865                regno++)
20866             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20867               saved += 8;
20868         }
20869
20870       func_type = arm_current_func_type ();
20871       /* Space for saved VFP registers.  */
20872       if (! IS_VOLATILE (func_type)
20873           && TARGET_HARD_FLOAT)
20874         saved += arm_get_vfp_saved_size ();
20875     }
20876   else /* TARGET_THUMB1 */
20877     {
20878       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20879       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20880       saved = core_saved;
20881       if (TARGET_BACKTRACE)
20882         saved += 16;
20883     }
20884
20885   /* Saved registers include the stack frame.  */
20886   offsets->saved_regs
20887     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20888   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20889
20890   /* A leaf function does not need any stack alignment if it has nothing
20891      on the stack.  */
20892   if (crtl->is_leaf && frame_size == 0
20893       /* However if it calls alloca(), we have a dynamically allocated
20894          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20895       && ! cfun->calls_alloca)
20896     {
20897       offsets->outgoing_args = offsets->soft_frame;
20898       offsets->locals_base = offsets->soft_frame;
20899       return;
20900     }
20901
20902   /* Ensure SFP has the correct alignment.  */
20903   if (ARM_DOUBLEWORD_ALIGN
20904       && (offsets->soft_frame & 7))
20905     {
20906       offsets->soft_frame += 4;
20907       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20908          when there is a stack frame as the alignment will be rolled into
20909          the normal stack adjustment.  */
20910       if (frame_size + crtl->outgoing_args_size == 0)
20911         {
20912           int reg = -1;
20913
20914           /* Register r3 is caller-saved.  Normally it does not need to be
20915              saved on entry by the prologue.  However if we choose to save
20916              it for padding then we may confuse the compiler into thinking
20917              a prologue sequence is required when in fact it is not.  This
20918              will occur when shrink-wrapping if r3 is used as a scratch
20919              register and there are no other callee-saved writes.
20920
20921              This situation can be avoided when other callee-saved registers
20922              are available and r3 is not mandatory if we choose a callee-saved
20923              register for padding.  */
20924           bool prefer_callee_reg_p = false;
20925
20926           /* If it is safe to use r3, then do so.  This sometimes
20927              generates better code on Thumb-2 by avoiding the need to
20928              use 32-bit push/pop instructions.  */
20929           if (! any_sibcall_could_use_r3 ()
20930               && arm_size_return_regs () <= 12
20931               && (offsets->saved_regs_mask & (1 << 3)) == 0
20932               && (TARGET_THUMB2
20933                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20934             {
20935               reg = 3;
20936               if (!TARGET_THUMB2)
20937                 prefer_callee_reg_p = true;
20938             }
20939           if (reg == -1
20940               || prefer_callee_reg_p)
20941             {
20942               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20943                 {
20944                   /* Avoid fixed registers; they may be changed at
20945                      arbitrary times so it's unsafe to restore them
20946                      during the epilogue.  */
20947                   if (!fixed_regs[i]
20948                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20949                     {
20950                       reg = i;
20951                       break;
20952                     }
20953                 }
20954             }
20955
20956           if (reg != -1)
20957             {
20958               offsets->saved_regs += 4;
20959               offsets->saved_regs_mask |= (1 << reg);
20960             }
20961         }
20962     }
20963
20964   offsets->locals_base = offsets->soft_frame + frame_size;
20965   offsets->outgoing_args = (offsets->locals_base
20966                             + crtl->outgoing_args_size);
20967
20968   if (ARM_DOUBLEWORD_ALIGN)
20969     {
20970       /* Ensure SP remains doubleword aligned.  */
20971       if (offsets->outgoing_args & 7)
20972         offsets->outgoing_args += 4;
20973       gcc_assert (!(offsets->outgoing_args & 7));
20974     }
20975 }
20976
20977
20978 /* Calculate the relative offsets for the different stack pointers.  Positive
20979    offsets are in the direction of stack growth.  */
20980
20981 HOST_WIDE_INT
20982 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20983 {
20984   arm_stack_offsets *offsets;
20985
20986   offsets = arm_get_frame_offsets ();
20987
20988   /* OK, now we have enough information to compute the distances.
20989      There must be an entry in these switch tables for each pair
20990      of registers in ELIMINABLE_REGS, even if some of the entries
20991      seem to be redundant or useless.  */
20992   switch (from)
20993     {
20994     case ARG_POINTER_REGNUM:
20995       switch (to)
20996         {
20997         case THUMB_HARD_FRAME_POINTER_REGNUM:
20998           return 0;
20999
21000         case FRAME_POINTER_REGNUM:
21001           /* This is the reverse of the soft frame pointer
21002              to hard frame pointer elimination below.  */
21003           return offsets->soft_frame - offsets->saved_args;
21004
21005         case ARM_HARD_FRAME_POINTER_REGNUM:
21006           /* This is only non-zero in the case where the static chain register
21007              is stored above the frame.  */
21008           return offsets->frame - offsets->saved_args - 4;
21009
21010         case STACK_POINTER_REGNUM:
21011           /* If nothing has been pushed on the stack at all
21012              then this will return -4.  This *is* correct!  */
21013           return offsets->outgoing_args - (offsets->saved_args + 4);
21014
21015         default:
21016           gcc_unreachable ();
21017         }
21018       gcc_unreachable ();
21019
21020     case FRAME_POINTER_REGNUM:
21021       switch (to)
21022         {
21023         case THUMB_HARD_FRAME_POINTER_REGNUM:
21024           return 0;
21025
21026         case ARM_HARD_FRAME_POINTER_REGNUM:
21027           /* The hard frame pointer points to the top entry in the
21028              stack frame.  The soft frame pointer to the bottom entry
21029              in the stack frame.  If there is no stack frame at all,
21030              then they are identical.  */
21031
21032           return offsets->frame - offsets->soft_frame;
21033
21034         case STACK_POINTER_REGNUM:
21035           return offsets->outgoing_args - offsets->soft_frame;
21036
21037         default:
21038           gcc_unreachable ();
21039         }
21040       gcc_unreachable ();
21041
21042     default:
21043       /* You cannot eliminate from the stack pointer.
21044          In theory you could eliminate from the hard frame
21045          pointer to the stack pointer, but this will never
21046          happen, since if a stack frame is not needed the
21047          hard frame pointer will never be used.  */
21048       gcc_unreachable ();
21049     }
21050 }
21051
21052 /* Given FROM and TO register numbers, say whether this elimination is
21053    allowed.  Frame pointer elimination is automatically handled.
21054
21055    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21056    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21057    pointer, we must eliminate FRAME_POINTER_REGNUM into
21058    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21059    ARG_POINTER_REGNUM.  */
21060
21061 bool
21062 arm_can_eliminate (const int from, const int to)
21063 {
21064   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21065           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21066           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21067           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21068            true);
21069 }
21070
21071 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21072    number of bytes pushed.  */
21073
21074 static int
21075 arm_save_coproc_regs(void)
21076 {
21077   int saved_size = 0;
21078   unsigned reg;
21079   unsigned start_reg;
21080   rtx insn;
21081
21082   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21083     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21084       {
21085         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21086         insn = gen_rtx_MEM (V2SImode, insn);
21087         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21088         RTX_FRAME_RELATED_P (insn) = 1;
21089         saved_size += 8;
21090       }
21091
21092   if (TARGET_HARD_FLOAT)
21093     {
21094       start_reg = FIRST_VFP_REGNUM;
21095
21096       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21097         {
21098           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21099               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21100             {
21101               if (start_reg != reg)
21102                 saved_size += vfp_emit_fstmd (start_reg,
21103                                               (reg - start_reg) / 2);
21104               start_reg = reg + 2;
21105             }
21106         }
21107       if (start_reg != reg)
21108         saved_size += vfp_emit_fstmd (start_reg,
21109                                       (reg - start_reg) / 2);
21110     }
21111   return saved_size;
21112 }
21113
21114
21115 /* Set the Thumb frame pointer from the stack pointer.  */
21116
21117 static void
21118 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21119 {
21120   HOST_WIDE_INT amount;
21121   rtx insn, dwarf;
21122
21123   amount = offsets->outgoing_args - offsets->locals_base;
21124   if (amount < 1024)
21125     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21126                                   stack_pointer_rtx, GEN_INT (amount)));
21127   else
21128     {
21129       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21130       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21131          expects the first two operands to be the same.  */
21132       if (TARGET_THUMB2)
21133         {
21134           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21135                                         stack_pointer_rtx,
21136                                         hard_frame_pointer_rtx));
21137         }
21138       else
21139         {
21140           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21141                                         hard_frame_pointer_rtx,
21142                                         stack_pointer_rtx));
21143         }
21144       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21145                            plus_constant (Pmode, stack_pointer_rtx, amount));
21146       RTX_FRAME_RELATED_P (dwarf) = 1;
21147       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21148     }
21149
21150   RTX_FRAME_RELATED_P (insn) = 1;
21151 }
21152
21153 struct scratch_reg {
21154   rtx reg;
21155   bool saved;
21156 };
21157
21158 /* Return a short-lived scratch register for use as a 2nd scratch register on
21159    function entry after the registers are saved in the prologue.  This register
21160    must be released by means of release_scratch_register_on_entry.  IP is not
21161    considered since it is always used as the 1st scratch register if available.
21162
21163    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21164    mask of live registers.  */
21165
21166 static void
21167 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21168                                unsigned long live_regs)
21169 {
21170   int regno = -1;
21171
21172   sr->saved = false;
21173
21174   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21175     regno = LR_REGNUM;
21176   else
21177     {
21178       unsigned int i;
21179
21180       for (i = 4; i < 11; i++)
21181         if (regno1 != i && (live_regs & (1 << i)) != 0)
21182           {
21183             regno = i;
21184             break;
21185           }
21186
21187       if (regno < 0)
21188         {
21189           /* If IP is used as the 1st scratch register for a nested function,
21190              then either r3 wasn't available or is used to preserve IP.  */
21191           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21192             regno1 = 3;
21193           regno = (regno1 == 3 ? 2 : 3);
21194           sr->saved
21195             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21196                                regno);
21197         }
21198     }
21199
21200   sr->reg = gen_rtx_REG (SImode, regno);
21201   if (sr->saved)
21202     {
21203       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21204       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21205       rtx x = gen_rtx_SET (stack_pointer_rtx,
21206                            plus_constant (Pmode, stack_pointer_rtx, -4));
21207       RTX_FRAME_RELATED_P (insn) = 1;
21208       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21209     }
21210 }
21211
21212 /* Release a scratch register obtained from the preceding function.  */
21213
21214 static void
21215 release_scratch_register_on_entry (struct scratch_reg *sr)
21216 {
21217   if (sr->saved)
21218     {
21219       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21220       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21221       rtx x = gen_rtx_SET (stack_pointer_rtx,
21222                            plus_constant (Pmode, stack_pointer_rtx, 4));
21223       RTX_FRAME_RELATED_P (insn) = 1;
21224       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21225     }
21226 }
21227
21228 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21229
21230 #if PROBE_INTERVAL > 4096
21231 #error Cannot use indexed addressing mode for stack probing
21232 #endif
21233
21234 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21235    inclusive.  These are offsets from the current stack pointer.  REGNO1
21236    is the index number of the 1st scratch register and LIVE_REGS is the
21237    mask of live registers.  */
21238
21239 static void
21240 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21241                             unsigned int regno1, unsigned long live_regs)
21242 {
21243   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21244
21245   /* See if we have a constant small number of probes to generate.  If so,
21246      that's the easy case.  */
21247   if (size <= PROBE_INTERVAL)
21248     {
21249       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21250       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21251       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21252     }
21253
21254   /* The run-time loop is made up of 10 insns in the generic case while the
21255      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21256   else if (size <= 5 * PROBE_INTERVAL)
21257     {
21258       HOST_WIDE_INT i, rem;
21259
21260       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21261       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21262       emit_stack_probe (reg1);
21263
21264       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21265          it exceeds SIZE.  If only two probes are needed, this will not
21266          generate any code.  Then probe at FIRST + SIZE.  */
21267       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21268         {
21269           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21270           emit_stack_probe (reg1);
21271         }
21272
21273       rem = size - (i - PROBE_INTERVAL);
21274       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21275         {
21276           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21277           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21278         }
21279       else
21280         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21281     }
21282
21283   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21284      extra careful with variables wrapping around because we might be at
21285      the very top (or the very bottom) of the address space and we have
21286      to be able to handle this case properly; in particular, we use an
21287      equality test for the loop condition.  */
21288   else
21289     {
21290       HOST_WIDE_INT rounded_size;
21291       struct scratch_reg sr;
21292
21293       get_scratch_register_on_entry (&sr, regno1, live_regs);
21294
21295       emit_move_insn (reg1, GEN_INT (first));
21296
21297
21298       /* Step 1: round SIZE to the previous multiple of the interval.  */
21299
21300       rounded_size = size & -PROBE_INTERVAL;
21301       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21302
21303
21304       /* Step 2: compute initial and final value of the loop counter.  */
21305
21306       /* TEST_ADDR = SP + FIRST.  */
21307       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21308
21309       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21310       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21311
21312
21313       /* Step 3: the loop
21314
21315          do
21316            {
21317              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21318              probe at TEST_ADDR
21319            }
21320          while (TEST_ADDR != LAST_ADDR)
21321
21322          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21323          until it is equal to ROUNDED_SIZE.  */
21324
21325       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21326
21327
21328       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21329          that SIZE is equal to ROUNDED_SIZE.  */
21330
21331       if (size != rounded_size)
21332         {
21333           HOST_WIDE_INT rem = size - rounded_size;
21334
21335           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21336             {
21337               emit_set_insn (sr.reg,
21338                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21339               emit_stack_probe (plus_constant (Pmode, sr.reg,
21340                                                PROBE_INTERVAL - rem));
21341             }
21342           else
21343             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21344         }
21345
21346       release_scratch_register_on_entry (&sr);
21347     }
21348
21349   /* Make sure nothing is scheduled before we are done.  */
21350   emit_insn (gen_blockage ());
21351 }
21352
21353 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21354    absolute addresses.  */
21355
21356 const char *
21357 output_probe_stack_range (rtx reg1, rtx reg2)
21358 {
21359   static int labelno = 0;
21360   char loop_lab[32];
21361   rtx xops[2];
21362
21363   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21364
21365   /* Loop.  */
21366   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21367
21368   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21369   xops[0] = reg1;
21370   xops[1] = GEN_INT (PROBE_INTERVAL);
21371   output_asm_insn ("sub\t%0, %0, %1", xops);
21372
21373   /* Probe at TEST_ADDR.  */
21374   output_asm_insn ("str\tr0, [%0, #0]", xops);
21375
21376   /* Test if TEST_ADDR == LAST_ADDR.  */
21377   xops[1] = reg2;
21378   output_asm_insn ("cmp\t%0, %1", xops);
21379
21380   /* Branch.  */
21381   fputs ("\tbne\t", asm_out_file);
21382   assemble_name_raw (asm_out_file, loop_lab);
21383   fputc ('\n', asm_out_file);
21384
21385   return "";
21386 }
21387
21388 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21389    function.  */
21390 void
21391 arm_expand_prologue (void)
21392 {
21393   rtx amount;
21394   rtx insn;
21395   rtx ip_rtx;
21396   unsigned long live_regs_mask;
21397   unsigned long func_type;
21398   int fp_offset = 0;
21399   int saved_pretend_args = 0;
21400   int saved_regs = 0;
21401   unsigned HOST_WIDE_INT args_to_push;
21402   HOST_WIDE_INT size;
21403   arm_stack_offsets *offsets;
21404   bool clobber_ip;
21405
21406   func_type = arm_current_func_type ();
21407
21408   /* Naked functions don't have prologues.  */
21409   if (IS_NAKED (func_type))
21410     {
21411       if (flag_stack_usage_info)
21412         current_function_static_stack_size = 0;
21413       return;
21414     }
21415
21416   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21417   args_to_push = crtl->args.pretend_args_size;
21418
21419   /* Compute which register we will have to save onto the stack.  */
21420   offsets = arm_get_frame_offsets ();
21421   live_regs_mask = offsets->saved_regs_mask;
21422
21423   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21424
21425   if (IS_STACKALIGN (func_type))
21426     {
21427       rtx r0, r1;
21428
21429       /* Handle a word-aligned stack pointer.  We generate the following:
21430
21431           mov r0, sp
21432           bic r1, r0, #7
21433           mov sp, r1
21434           <save and restore r0 in normal prologue/epilogue>
21435           mov sp, r0
21436           bx lr
21437
21438          The unwinder doesn't need to know about the stack realignment.
21439          Just tell it we saved SP in r0.  */
21440       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21441
21442       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21443       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21444
21445       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21446       RTX_FRAME_RELATED_P (insn) = 1;
21447       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21448
21449       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21450
21451       /* ??? The CFA changes here, which may cause GDB to conclude that it
21452          has entered a different function.  That said, the unwind info is
21453          correct, individually, before and after this instruction because
21454          we've described the save of SP, which will override the default
21455          handling of SP as restoring from the CFA.  */
21456       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21457     }
21458
21459   /* The static chain register is the same as the IP register.  If it is
21460      clobbered when creating the frame, we need to save and restore it.  */
21461   clobber_ip = IS_NESTED (func_type)
21462                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21463                    || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21464                        && !df_regs_ever_live_p (LR_REGNUM)
21465                        && arm_r3_live_at_start_p ()));
21466
21467   /* Find somewhere to store IP whilst the frame is being created.
21468      We try the following places in order:
21469
21470        1. The last argument register r3 if it is available.
21471        2. A slot on the stack above the frame if there are no
21472           arguments to push onto the stack.
21473        3. Register r3 again, after pushing the argument registers
21474           onto the stack, if this is a varargs function.
21475        4. The last slot on the stack created for the arguments to
21476           push, if this isn't a varargs function.
21477
21478      Note - we only need to tell the dwarf2 backend about the SP
21479      adjustment in the second variant; the static chain register
21480      doesn't need to be unwound, as it doesn't contain a value
21481      inherited from the caller.  */
21482   if (clobber_ip)
21483     {
21484       if (!arm_r3_live_at_start_p ())
21485         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21486       else if (args_to_push == 0)
21487         {
21488           rtx addr, dwarf;
21489
21490           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21491           saved_regs += 4;
21492
21493           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21494           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21495           fp_offset = 4;
21496
21497           /* Just tell the dwarf backend that we adjusted SP.  */
21498           dwarf = gen_rtx_SET (stack_pointer_rtx,
21499                                plus_constant (Pmode, stack_pointer_rtx,
21500                                               -fp_offset));
21501           RTX_FRAME_RELATED_P (insn) = 1;
21502           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21503         }
21504       else
21505         {
21506           /* Store the args on the stack.  */
21507           if (cfun->machine->uses_anonymous_args)
21508             {
21509               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21510                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21511               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21512               saved_pretend_args = 1;
21513             }
21514           else
21515             {
21516               rtx addr, dwarf;
21517
21518               if (args_to_push == 4)
21519                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21520               else
21521                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21522                                            plus_constant (Pmode,
21523                                                           stack_pointer_rtx,
21524                                                           -args_to_push));
21525
21526               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21527
21528               /* Just tell the dwarf backend that we adjusted SP.  */
21529               dwarf = gen_rtx_SET (stack_pointer_rtx,
21530                                    plus_constant (Pmode, stack_pointer_rtx,
21531                                                   -args_to_push));
21532               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21533             }
21534
21535           RTX_FRAME_RELATED_P (insn) = 1;
21536           fp_offset = args_to_push;
21537           args_to_push = 0;
21538         }
21539     }
21540
21541   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21542     {
21543       if (IS_INTERRUPT (func_type))
21544         {
21545           /* Interrupt functions must not corrupt any registers.
21546              Creating a frame pointer however, corrupts the IP
21547              register, so we must push it first.  */
21548           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21549
21550           /* Do not set RTX_FRAME_RELATED_P on this insn.
21551              The dwarf stack unwinding code only wants to see one
21552              stack decrement per function, and this is not it.  If
21553              this instruction is labeled as being part of the frame
21554              creation sequence then dwarf2out_frame_debug_expr will
21555              die when it encounters the assignment of IP to FP
21556              later on, since the use of SP here establishes SP as
21557              the CFA register and not IP.
21558
21559              Anyway this instruction is not really part of the stack
21560              frame creation although it is part of the prologue.  */
21561         }
21562
21563       insn = emit_set_insn (ip_rtx,
21564                             plus_constant (Pmode, stack_pointer_rtx,
21565                                            fp_offset));
21566       RTX_FRAME_RELATED_P (insn) = 1;
21567     }
21568
21569   if (args_to_push)
21570     {
21571       /* Push the argument registers, or reserve space for them.  */
21572       if (cfun->machine->uses_anonymous_args)
21573         insn = emit_multi_reg_push
21574           ((0xf0 >> (args_to_push / 4)) & 0xf,
21575            (0xf0 >> (args_to_push / 4)) & 0xf);
21576       else
21577         insn = emit_insn
21578           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21579                        GEN_INT (- args_to_push)));
21580       RTX_FRAME_RELATED_P (insn) = 1;
21581     }
21582
21583   /* If this is an interrupt service routine, and the link register
21584      is going to be pushed, and we're not generating extra
21585      push of IP (needed when frame is needed and frame layout if apcs),
21586      subtracting four from LR now will mean that the function return
21587      can be done with a single instruction.  */
21588   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21589       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21590       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21591       && TARGET_ARM)
21592     {
21593       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21594
21595       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21596     }
21597
21598   if (live_regs_mask)
21599     {
21600       unsigned long dwarf_regs_mask = live_regs_mask;
21601
21602       saved_regs += bit_count (live_regs_mask) * 4;
21603       if (optimize_size && !frame_pointer_needed
21604           && saved_regs == offsets->saved_regs - offsets->saved_args)
21605         {
21606           /* If no coprocessor registers are being pushed and we don't have
21607              to worry about a frame pointer then push extra registers to
21608              create the stack frame.  This is done in a way that does not
21609              alter the frame layout, so is independent of the epilogue.  */
21610           int n;
21611           int frame;
21612           n = 0;
21613           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21614             n++;
21615           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21616           if (frame && n * 4 >= frame)
21617             {
21618               n = frame / 4;
21619               live_regs_mask |= (1 << n) - 1;
21620               saved_regs += frame;
21621             }
21622         }
21623
21624       if (TARGET_LDRD
21625           && current_tune->prefer_ldrd_strd
21626           && !optimize_function_for_size_p (cfun))
21627         {
21628           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21629           if (TARGET_THUMB2)
21630             thumb2_emit_strd_push (live_regs_mask);
21631           else if (TARGET_ARM
21632                    && !TARGET_APCS_FRAME
21633                    && !IS_INTERRUPT (func_type))
21634             arm_emit_strd_push (live_regs_mask);
21635           else
21636             {
21637               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21638               RTX_FRAME_RELATED_P (insn) = 1;
21639             }
21640         }
21641       else
21642         {
21643           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21644           RTX_FRAME_RELATED_P (insn) = 1;
21645         }
21646     }
21647
21648   if (! IS_VOLATILE (func_type))
21649     saved_regs += arm_save_coproc_regs ();
21650
21651   if (frame_pointer_needed && TARGET_ARM)
21652     {
21653       /* Create the new frame pointer.  */
21654       if (TARGET_APCS_FRAME)
21655         {
21656           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21657           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21658           RTX_FRAME_RELATED_P (insn) = 1;
21659         }
21660       else
21661         {
21662           insn = GEN_INT (saved_regs - (4 + fp_offset));
21663           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21664                                         stack_pointer_rtx, insn));
21665           RTX_FRAME_RELATED_P (insn) = 1;
21666         }
21667     }
21668
21669   size = offsets->outgoing_args - offsets->saved_args;
21670   if (flag_stack_usage_info)
21671     current_function_static_stack_size = size;
21672
21673   /* If this isn't an interrupt service routine and we have a frame, then do
21674      stack checking.  We use IP as the first scratch register, except for the
21675      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21676   if (!IS_INTERRUPT (func_type)
21677       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21678     {
21679       unsigned int regno;
21680
21681       if (!IS_NESTED (func_type) || clobber_ip)
21682         regno = IP_REGNUM;
21683       else if (df_regs_ever_live_p (LR_REGNUM))
21684         regno = LR_REGNUM;
21685       else
21686         regno = 3;
21687
21688       if (crtl->is_leaf && !cfun->calls_alloca)
21689         {
21690           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21691             arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21692                                         size - STACK_CHECK_PROTECT,
21693                                         regno, live_regs_mask);
21694         }
21695       else if (size > 0)
21696         arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21697                                     regno, live_regs_mask);
21698     }
21699
21700   /* Recover the static chain register.  */
21701   if (clobber_ip)
21702     {
21703       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21704         insn = gen_rtx_REG (SImode, 3);
21705       else
21706         {
21707           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21708           insn = gen_frame_mem (SImode, insn);
21709         }
21710       emit_set_insn (ip_rtx, insn);
21711       emit_insn (gen_force_register_use (ip_rtx));
21712     }
21713
21714   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21715     {
21716       /* This add can produce multiple insns for a large constant, so we
21717          need to get tricky.  */
21718       rtx_insn *last = get_last_insn ();
21719
21720       amount = GEN_INT (offsets->saved_args + saved_regs
21721                         - offsets->outgoing_args);
21722
21723       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21724                                     amount));
21725       do
21726         {
21727           last = last ? NEXT_INSN (last) : get_insns ();
21728           RTX_FRAME_RELATED_P (last) = 1;
21729         }
21730       while (last != insn);
21731
21732       /* If the frame pointer is needed, emit a special barrier that
21733          will prevent the scheduler from moving stores to the frame
21734          before the stack adjustment.  */
21735       if (frame_pointer_needed)
21736         emit_insn (gen_stack_tie (stack_pointer_rtx,
21737                                   hard_frame_pointer_rtx));
21738     }
21739
21740
21741   if (frame_pointer_needed && TARGET_THUMB2)
21742     thumb_set_frame_pointer (offsets);
21743
21744   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21745     {
21746       unsigned long mask;
21747
21748       mask = live_regs_mask;
21749       mask &= THUMB2_WORK_REGS;
21750       if (!IS_NESTED (func_type))
21751         mask |= (1 << IP_REGNUM);
21752       arm_load_pic_register (mask);
21753     }
21754
21755   /* If we are profiling, make sure no instructions are scheduled before
21756      the call to mcount.  Similarly if the user has requested no
21757      scheduling in the prolog.  Similarly if we want non-call exceptions
21758      using the EABI unwinder, to prevent faulting instructions from being
21759      swapped with a stack adjustment.  */
21760   if (crtl->profile || !TARGET_SCHED_PROLOG
21761       || (arm_except_unwind_info (&global_options) == UI_TARGET
21762           && cfun->can_throw_non_call_exceptions))
21763     emit_insn (gen_blockage ());
21764
21765   /* If the link register is being kept alive, with the return address in it,
21766      then make sure that it does not get reused by the ce2 pass.  */
21767   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21768     cfun->machine->lr_save_eliminated = 1;
21769 }
21770 \f
21771 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21772 static void
21773 arm_print_condition (FILE *stream)
21774 {
21775   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21776     {
21777       /* Branch conversion is not implemented for Thumb-2.  */
21778       if (TARGET_THUMB)
21779         {
21780           output_operand_lossage ("predicated Thumb instruction");
21781           return;
21782         }
21783       if (current_insn_predicate != NULL)
21784         {
21785           output_operand_lossage
21786             ("predicated instruction in conditional sequence");
21787           return;
21788         }
21789
21790       fputs (arm_condition_codes[arm_current_cc], stream);
21791     }
21792   else if (current_insn_predicate)
21793     {
21794       enum arm_cond_code code;
21795
21796       if (TARGET_THUMB1)
21797         {
21798           output_operand_lossage ("predicated Thumb instruction");
21799           return;
21800         }
21801
21802       code = get_arm_condition_code (current_insn_predicate);
21803       fputs (arm_condition_codes[code], stream);
21804     }
21805 }
21806
21807
21808 /* Globally reserved letters: acln
21809    Puncutation letters currently used: @_|?().!#
21810    Lower case letters currently used: bcdefhimpqtvwxyz
21811    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21812    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21813
21814    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21815
21816    If CODE is 'd', then the X is a condition operand and the instruction
21817    should only be executed if the condition is true.
21818    if CODE is 'D', then the X is a condition operand and the instruction
21819    should only be executed if the condition is false: however, if the mode
21820    of the comparison is CCFPEmode, then always execute the instruction -- we
21821    do this because in these circumstances !GE does not necessarily imply LT;
21822    in these cases the instruction pattern will take care to make sure that
21823    an instruction containing %d will follow, thereby undoing the effects of
21824    doing this instruction unconditionally.
21825    If CODE is 'N' then X is a floating point operand that must be negated
21826    before output.
21827    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21828    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21829 static void
21830 arm_print_operand (FILE *stream, rtx x, int code)
21831 {
21832   switch (code)
21833     {
21834     case '@':
21835       fputs (ASM_COMMENT_START, stream);
21836       return;
21837
21838     case '_':
21839       fputs (user_label_prefix, stream);
21840       return;
21841
21842     case '|':
21843       fputs (REGISTER_PREFIX, stream);
21844       return;
21845
21846     case '?':
21847       arm_print_condition (stream);
21848       return;
21849
21850     case '.':
21851       /* The current condition code for a condition code setting instruction.
21852          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21853       fputc('s', stream);
21854       arm_print_condition (stream);
21855       return;
21856
21857     case '!':
21858       /* If the instruction is conditionally executed then print
21859          the current condition code, otherwise print 's'.  */
21860       gcc_assert (TARGET_THUMB2);
21861       if (current_insn_predicate)
21862         arm_print_condition (stream);
21863       else
21864         fputc('s', stream);
21865       break;
21866
21867     /* %# is a "break" sequence. It doesn't output anything, but is used to
21868        separate e.g. operand numbers from following text, if that text consists
21869        of further digits which we don't want to be part of the operand
21870        number.  */
21871     case '#':
21872       return;
21873
21874     case 'N':
21875       {
21876         REAL_VALUE_TYPE r;
21877         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21878         fprintf (stream, "%s", fp_const_from_val (&r));
21879       }
21880       return;
21881
21882     /* An integer or symbol address without a preceding # sign.  */
21883     case 'c':
21884       switch (GET_CODE (x))
21885         {
21886         case CONST_INT:
21887           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21888           break;
21889
21890         case SYMBOL_REF:
21891           output_addr_const (stream, x);
21892           break;
21893
21894         case CONST:
21895           if (GET_CODE (XEXP (x, 0)) == PLUS
21896               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21897             {
21898               output_addr_const (stream, x);
21899               break;
21900             }
21901           /* Fall through.  */
21902
21903         default:
21904           output_operand_lossage ("Unsupported operand for code '%c'", code);
21905         }
21906       return;
21907
21908     /* An integer that we want to print in HEX.  */
21909     case 'x':
21910       switch (GET_CODE (x))
21911         {
21912         case CONST_INT:
21913           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21914           break;
21915
21916         default:
21917           output_operand_lossage ("Unsupported operand for code '%c'", code);
21918         }
21919       return;
21920
21921     case 'B':
21922       if (CONST_INT_P (x))
21923         {
21924           HOST_WIDE_INT val;
21925           val = ARM_SIGN_EXTEND (~INTVAL (x));
21926           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21927         }
21928       else
21929         {
21930           putc ('~', stream);
21931           output_addr_const (stream, x);
21932         }
21933       return;
21934
21935     case 'b':
21936       /* Print the log2 of a CONST_INT.  */
21937       {
21938         HOST_WIDE_INT val;
21939
21940         if (!CONST_INT_P (x)
21941             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21942           output_operand_lossage ("Unsupported operand for code '%c'", code);
21943         else
21944           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21945       }
21946       return;
21947
21948     case 'L':
21949       /* The low 16 bits of an immediate constant.  */
21950       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21951       return;
21952
21953     case 'i':
21954       fprintf (stream, "%s", arithmetic_instr (x, 1));
21955       return;
21956
21957     case 'I':
21958       fprintf (stream, "%s", arithmetic_instr (x, 0));
21959       return;
21960
21961     case 'S':
21962       {
21963         HOST_WIDE_INT val;
21964         const char *shift;
21965
21966         shift = shift_op (x, &val);
21967
21968         if (shift)
21969           {
21970             fprintf (stream, ", %s ", shift);
21971             if (val == -1)
21972               arm_print_operand (stream, XEXP (x, 1), 0);
21973             else
21974               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21975           }
21976       }
21977       return;
21978
21979       /* An explanation of the 'Q', 'R' and 'H' register operands:
21980
21981          In a pair of registers containing a DI or DF value the 'Q'
21982          operand returns the register number of the register containing
21983          the least significant part of the value.  The 'R' operand returns
21984          the register number of the register containing the most
21985          significant part of the value.
21986
21987          The 'H' operand returns the higher of the two register numbers.
21988          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21989          same as the 'Q' operand, since the most significant part of the
21990          value is held in the lower number register.  The reverse is true
21991          on systems where WORDS_BIG_ENDIAN is false.
21992
21993          The purpose of these operands is to distinguish between cases
21994          where the endian-ness of the values is important (for example
21995          when they are added together), and cases where the endian-ness
21996          is irrelevant, but the order of register operations is important.
21997          For example when loading a value from memory into a register
21998          pair, the endian-ness does not matter.  Provided that the value
21999          from the lower memory address is put into the lower numbered
22000          register, and the value from the higher address is put into the
22001          higher numbered register, the load will work regardless of whether
22002          the value being loaded is big-wordian or little-wordian.  The
22003          order of the two register loads can matter however, if the address
22004          of the memory location is actually held in one of the registers
22005          being overwritten by the load.
22006
22007          The 'Q' and 'R' constraints are also available for 64-bit
22008          constants.  */
22009     case 'Q':
22010       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22011         {
22012           rtx part = gen_lowpart (SImode, x);
22013           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22014           return;
22015         }
22016
22017       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22018         {
22019           output_operand_lossage ("invalid operand for code '%c'", code);
22020           return;
22021         }
22022
22023       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22024       return;
22025
22026     case 'R':
22027       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22028         {
22029           machine_mode mode = GET_MODE (x);
22030           rtx part;
22031
22032           if (mode == VOIDmode)
22033             mode = DImode;
22034           part = gen_highpart_mode (SImode, mode, x);
22035           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22036           return;
22037         }
22038
22039       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22040         {
22041           output_operand_lossage ("invalid operand for code '%c'", code);
22042           return;
22043         }
22044
22045       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22046       return;
22047
22048     case 'H':
22049       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22050         {
22051           output_operand_lossage ("invalid operand for code '%c'", code);
22052           return;
22053         }
22054
22055       asm_fprintf (stream, "%r", REGNO (x) + 1);
22056       return;
22057
22058     case 'J':
22059       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060         {
22061           output_operand_lossage ("invalid operand for code '%c'", code);
22062           return;
22063         }
22064
22065       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22066       return;
22067
22068     case 'K':
22069       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22070         {
22071           output_operand_lossage ("invalid operand for code '%c'", code);
22072           return;
22073         }
22074
22075       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22076       return;
22077
22078     case 'm':
22079       asm_fprintf (stream, "%r",
22080                    REG_P (XEXP (x, 0))
22081                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22082       return;
22083
22084     case 'M':
22085       asm_fprintf (stream, "{%r-%r}",
22086                    REGNO (x),
22087                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22088       return;
22089
22090     /* Like 'M', but writing doubleword vector registers, for use by Neon
22091        insns.  */
22092     case 'h':
22093       {
22094         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22095         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22096         if (numregs == 1)
22097           asm_fprintf (stream, "{d%d}", regno);
22098         else
22099           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22100       }
22101       return;
22102
22103     case 'd':
22104       /* CONST_TRUE_RTX means always -- that's the default.  */
22105       if (x == const_true_rtx)
22106         return;
22107
22108       if (!COMPARISON_P (x))
22109         {
22110           output_operand_lossage ("invalid operand for code '%c'", code);
22111           return;
22112         }
22113
22114       fputs (arm_condition_codes[get_arm_condition_code (x)],
22115              stream);
22116       return;
22117
22118     case 'D':
22119       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22120          want to do that.  */
22121       if (x == const_true_rtx)
22122         {
22123           output_operand_lossage ("instruction never executed");
22124           return;
22125         }
22126       if (!COMPARISON_P (x))
22127         {
22128           output_operand_lossage ("invalid operand for code '%c'", code);
22129           return;
22130         }
22131
22132       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22133                                  (get_arm_condition_code (x))],
22134              stream);
22135       return;
22136
22137     case 's':
22138     case 'V':
22139     case 'W':
22140     case 'X':
22141     case 'Y':
22142     case 'Z':
22143       /* Former Maverick support, removed after GCC-4.7.  */
22144       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22145       return;
22146
22147     case 'U':
22148       if (!REG_P (x)
22149           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22150           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22151         /* Bad value for wCG register number.  */
22152         {
22153           output_operand_lossage ("invalid operand for code '%c'", code);
22154           return;
22155         }
22156
22157       else
22158         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22159       return;
22160
22161       /* Print an iWMMXt control register name.  */
22162     case 'w':
22163       if (!CONST_INT_P (x)
22164           || INTVAL (x) < 0
22165           || INTVAL (x) >= 16)
22166         /* Bad value for wC register number.  */
22167         {
22168           output_operand_lossage ("invalid operand for code '%c'", code);
22169           return;
22170         }
22171
22172       else
22173         {
22174           static const char * wc_reg_names [16] =
22175             {
22176               "wCID",  "wCon",  "wCSSF", "wCASF",
22177               "wC4",   "wC5",   "wC6",   "wC7",
22178               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22179               "wC12",  "wC13",  "wC14",  "wC15"
22180             };
22181
22182           fputs (wc_reg_names [INTVAL (x)], stream);
22183         }
22184       return;
22185
22186     /* Print the high single-precision register of a VFP double-precision
22187        register.  */
22188     case 'p':
22189       {
22190         machine_mode mode = GET_MODE (x);
22191         int regno;
22192
22193         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22194           {
22195             output_operand_lossage ("invalid operand for code '%c'", code);
22196             return;
22197           }
22198
22199         regno = REGNO (x);
22200         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22201           {
22202             output_operand_lossage ("invalid operand for code '%c'", code);
22203             return;
22204           }
22205
22206         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22207       }
22208       return;
22209
22210     /* Print a VFP/Neon double precision or quad precision register name.  */
22211     case 'P':
22212     case 'q':
22213       {
22214         machine_mode mode = GET_MODE (x);
22215         int is_quad = (code == 'q');
22216         int regno;
22217
22218         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22219           {
22220             output_operand_lossage ("invalid operand for code '%c'", code);
22221             return;
22222           }
22223
22224         if (!REG_P (x)
22225             || !IS_VFP_REGNUM (REGNO (x)))
22226           {
22227             output_operand_lossage ("invalid operand for code '%c'", code);
22228             return;
22229           }
22230
22231         regno = REGNO (x);
22232         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22233             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22234           {
22235             output_operand_lossage ("invalid operand for code '%c'", code);
22236             return;
22237           }
22238
22239         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22240           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22241       }
22242       return;
22243
22244     /* These two codes print the low/high doubleword register of a Neon quad
22245        register, respectively.  For pair-structure types, can also print
22246        low/high quadword registers.  */
22247     case 'e':
22248     case 'f':
22249       {
22250         machine_mode mode = GET_MODE (x);
22251         int regno;
22252
22253         if ((GET_MODE_SIZE (mode) != 16
22254              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22255           {
22256             output_operand_lossage ("invalid operand for code '%c'", code);
22257             return;
22258           }
22259
22260         regno = REGNO (x);
22261         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22262           {
22263             output_operand_lossage ("invalid operand for code '%c'", code);
22264             return;
22265           }
22266
22267         if (GET_MODE_SIZE (mode) == 16)
22268           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22269                                   + (code == 'f' ? 1 : 0));
22270         else
22271           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22272                                   + (code == 'f' ? 1 : 0));
22273       }
22274       return;
22275
22276     /* Print a VFPv3 floating-point constant, represented as an integer
22277        index.  */
22278     case 'G':
22279       {
22280         int index = vfp3_const_double_index (x);
22281         gcc_assert (index != -1);
22282         fprintf (stream, "%d", index);
22283       }
22284       return;
22285
22286     /* Print bits representing opcode features for Neon.
22287
22288        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22289        and polynomials as unsigned.
22290
22291        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22292
22293        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22294
22295     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22296     case 'T':
22297       {
22298         HOST_WIDE_INT bits = INTVAL (x);
22299         fputc ("uspf"[bits & 3], stream);
22300       }
22301       return;
22302
22303     /* Likewise, but signed and unsigned integers are both 'i'.  */
22304     case 'F':
22305       {
22306         HOST_WIDE_INT bits = INTVAL (x);
22307         fputc ("iipf"[bits & 3], stream);
22308       }
22309       return;
22310
22311     /* As for 'T', but emit 'u' instead of 'p'.  */
22312     case 't':
22313       {
22314         HOST_WIDE_INT bits = INTVAL (x);
22315         fputc ("usuf"[bits & 3], stream);
22316       }
22317       return;
22318
22319     /* Bit 2: rounding (vs none).  */
22320     case 'O':
22321       {
22322         HOST_WIDE_INT bits = INTVAL (x);
22323         fputs ((bits & 4) != 0 ? "r" : "", stream);
22324       }
22325       return;
22326
22327     /* Memory operand for vld1/vst1 instruction.  */
22328     case 'A':
22329       {
22330         rtx addr;
22331         bool postinc = FALSE;
22332         rtx postinc_reg = NULL;
22333         unsigned align, memsize, align_bits;
22334
22335         gcc_assert (MEM_P (x));
22336         addr = XEXP (x, 0);
22337         if (GET_CODE (addr) == POST_INC)
22338           {
22339             postinc = 1;
22340             addr = XEXP (addr, 0);
22341           }
22342         if (GET_CODE (addr) == POST_MODIFY)
22343           {
22344             postinc_reg = XEXP( XEXP (addr, 1), 1);
22345             addr = XEXP (addr, 0);
22346           }
22347         asm_fprintf (stream, "[%r", REGNO (addr));
22348
22349         /* We know the alignment of this access, so we can emit a hint in the
22350            instruction (for some alignments) as an aid to the memory subsystem
22351            of the target.  */
22352         align = MEM_ALIGN (x) >> 3;
22353         memsize = MEM_SIZE (x);
22354
22355         /* Only certain alignment specifiers are supported by the hardware.  */
22356         if (memsize == 32 && (align % 32) == 0)
22357           align_bits = 256;
22358         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22359           align_bits = 128;
22360         else if (memsize >= 8 && (align % 8) == 0)
22361           align_bits = 64;
22362         else
22363           align_bits = 0;
22364
22365         if (align_bits != 0)
22366           asm_fprintf (stream, ":%d", align_bits);
22367
22368         asm_fprintf (stream, "]");
22369
22370         if (postinc)
22371           fputs("!", stream);
22372         if (postinc_reg)
22373           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22374       }
22375       return;
22376
22377     case 'C':
22378       {
22379         rtx addr;
22380
22381         gcc_assert (MEM_P (x));
22382         addr = XEXP (x, 0);
22383         gcc_assert (REG_P (addr));
22384         asm_fprintf (stream, "[%r]", REGNO (addr));
22385       }
22386       return;
22387
22388     /* Translate an S register number into a D register number and element index.  */
22389     case 'y':
22390       {
22391         machine_mode mode = GET_MODE (x);
22392         int regno;
22393
22394         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22395           {
22396             output_operand_lossage ("invalid operand for code '%c'", code);
22397             return;
22398           }
22399
22400         regno = REGNO (x);
22401         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22402           {
22403             output_operand_lossage ("invalid operand for code '%c'", code);
22404             return;
22405           }
22406
22407         regno = regno - FIRST_VFP_REGNUM;
22408         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22409       }
22410       return;
22411
22412     case 'v':
22413         gcc_assert (CONST_DOUBLE_P (x));
22414         int result;
22415         result = vfp3_const_double_for_fract_bits (x);
22416         if (result == 0)
22417           result = vfp3_const_double_for_bits (x);
22418         fprintf (stream, "#%d", result);
22419         return;
22420
22421     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22422        number into a D register number and element index.  */
22423     case 'z':
22424       {
22425         machine_mode mode = GET_MODE (x);
22426         int regno;
22427
22428         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22429           {
22430             output_operand_lossage ("invalid operand for code '%c'", code);
22431             return;
22432           }
22433
22434         regno = REGNO (x);
22435         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22436           {
22437             output_operand_lossage ("invalid operand for code '%c'", code);
22438             return;
22439           }
22440
22441         regno = regno - FIRST_VFP_REGNUM;
22442         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22443       }
22444       return;
22445
22446     default:
22447       if (x == 0)
22448         {
22449           output_operand_lossage ("missing operand");
22450           return;
22451         }
22452
22453       switch (GET_CODE (x))
22454         {
22455         case REG:
22456           asm_fprintf (stream, "%r", REGNO (x));
22457           break;
22458
22459         case MEM:
22460           output_address (GET_MODE (x), XEXP (x, 0));
22461           break;
22462
22463         case CONST_DOUBLE:
22464           {
22465             char fpstr[20];
22466             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22467                               sizeof (fpstr), 0, 1);
22468             fprintf (stream, "#%s", fpstr);
22469           }
22470           break;
22471
22472         default:
22473           gcc_assert (GET_CODE (x) != NEG);
22474           fputc ('#', stream);
22475           if (GET_CODE (x) == HIGH)
22476             {
22477               fputs (":lower16:", stream);
22478               x = XEXP (x, 0);
22479             }
22480
22481           output_addr_const (stream, x);
22482           break;
22483         }
22484     }
22485 }
22486 \f
22487 /* Target hook for printing a memory address.  */
22488 static void
22489 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22490 {
22491   if (TARGET_32BIT)
22492     {
22493       int is_minus = GET_CODE (x) == MINUS;
22494
22495       if (REG_P (x))
22496         asm_fprintf (stream, "[%r]", REGNO (x));
22497       else if (GET_CODE (x) == PLUS || is_minus)
22498         {
22499           rtx base = XEXP (x, 0);
22500           rtx index = XEXP (x, 1);
22501           HOST_WIDE_INT offset = 0;
22502           if (!REG_P (base)
22503               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22504             {
22505               /* Ensure that BASE is a register.  */
22506               /* (one of them must be).  */
22507               /* Also ensure the SP is not used as in index register.  */
22508               std::swap (base, index);
22509             }
22510           switch (GET_CODE (index))
22511             {
22512             case CONST_INT:
22513               offset = INTVAL (index);
22514               if (is_minus)
22515                 offset = -offset;
22516               asm_fprintf (stream, "[%r, #%wd]",
22517                            REGNO (base), offset);
22518               break;
22519
22520             case REG:
22521               asm_fprintf (stream, "[%r, %s%r]",
22522                            REGNO (base), is_minus ? "-" : "",
22523                            REGNO (index));
22524               break;
22525
22526             case MULT:
22527             case ASHIFTRT:
22528             case LSHIFTRT:
22529             case ASHIFT:
22530             case ROTATERT:
22531               {
22532                 asm_fprintf (stream, "[%r, %s%r",
22533                              REGNO (base), is_minus ? "-" : "",
22534                              REGNO (XEXP (index, 0)));
22535                 arm_print_operand (stream, index, 'S');
22536                 fputs ("]", stream);
22537                 break;
22538               }
22539
22540             default:
22541               gcc_unreachable ();
22542             }
22543         }
22544       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22545                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22546         {
22547           gcc_assert (REG_P (XEXP (x, 0)));
22548
22549           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22550             asm_fprintf (stream, "[%r, #%s%d]!",
22551                          REGNO (XEXP (x, 0)),
22552                          GET_CODE (x) == PRE_DEC ? "-" : "",
22553                          GET_MODE_SIZE (mode));
22554           else
22555             asm_fprintf (stream, "[%r], #%s%d",
22556                          REGNO (XEXP (x, 0)),
22557                          GET_CODE (x) == POST_DEC ? "-" : "",
22558                          GET_MODE_SIZE (mode));
22559         }
22560       else if (GET_CODE (x) == PRE_MODIFY)
22561         {
22562           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22563           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22564             asm_fprintf (stream, "#%wd]!",
22565                          INTVAL (XEXP (XEXP (x, 1), 1)));
22566           else
22567             asm_fprintf (stream, "%r]!",
22568                          REGNO (XEXP (XEXP (x, 1), 1)));
22569         }
22570       else if (GET_CODE (x) == POST_MODIFY)
22571         {
22572           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22573           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22574             asm_fprintf (stream, "#%wd",
22575                          INTVAL (XEXP (XEXP (x, 1), 1)));
22576           else
22577             asm_fprintf (stream, "%r",
22578                          REGNO (XEXP (XEXP (x, 1), 1)));
22579         }
22580       else output_addr_const (stream, x);
22581     }
22582   else
22583     {
22584       if (REG_P (x))
22585         asm_fprintf (stream, "[%r]", REGNO (x));
22586       else if (GET_CODE (x) == POST_INC)
22587         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22588       else if (GET_CODE (x) == PLUS)
22589         {
22590           gcc_assert (REG_P (XEXP (x, 0)));
22591           if (CONST_INT_P (XEXP (x, 1)))
22592             asm_fprintf (stream, "[%r, #%wd]",
22593                          REGNO (XEXP (x, 0)),
22594                          INTVAL (XEXP (x, 1)));
22595           else
22596             asm_fprintf (stream, "[%r, %r]",
22597                          REGNO (XEXP (x, 0)),
22598                          REGNO (XEXP (x, 1)));
22599         }
22600       else
22601         output_addr_const (stream, x);
22602     }
22603 }
22604 \f
22605 /* Target hook for indicating whether a punctuation character for
22606    TARGET_PRINT_OPERAND is valid.  */
22607 static bool
22608 arm_print_operand_punct_valid_p (unsigned char code)
22609 {
22610   return (code == '@' || code == '|' || code == '.'
22611           || code == '(' || code == ')' || code == '#'
22612           || (TARGET_32BIT && (code == '?'))
22613           || (TARGET_THUMB2 && (code == '!'))
22614           || (TARGET_THUMB && (code == '_')));
22615 }
22616 \f
22617 /* Target hook for assembling integer objects.  The ARM version needs to
22618    handle word-sized values specially.  */
22619 static bool
22620 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22621 {
22622   machine_mode mode;
22623
22624   if (size == UNITS_PER_WORD && aligned_p)
22625     {
22626       fputs ("\t.word\t", asm_out_file);
22627       output_addr_const (asm_out_file, x);
22628
22629       /* Mark symbols as position independent.  We only do this in the
22630          .text segment, not in the .data segment.  */
22631       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22632           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22633         {
22634           /* See legitimize_pic_address for an explanation of the
22635              TARGET_VXWORKS_RTP check.  */
22636           /* References to weak symbols cannot be resolved locally:
22637              they may be overridden by a non-weak definition at link
22638              time.  */
22639           if (!arm_pic_data_is_text_relative
22640               || (GET_CODE (x) == SYMBOL_REF
22641                   && (!SYMBOL_REF_LOCAL_P (x)
22642                       || (SYMBOL_REF_DECL (x)
22643                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22644             fputs ("(GOT)", asm_out_file);
22645           else
22646             fputs ("(GOTOFF)", asm_out_file);
22647         }
22648       fputc ('\n', asm_out_file);
22649       return true;
22650     }
22651
22652   mode = GET_MODE (x);
22653
22654   if (arm_vector_mode_supported_p (mode))
22655     {
22656       int i, units;
22657
22658       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22659
22660       units = CONST_VECTOR_NUNITS (x);
22661       size = GET_MODE_UNIT_SIZE (mode);
22662
22663       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22664         for (i = 0; i < units; i++)
22665           {
22666             rtx elt = CONST_VECTOR_ELT (x, i);
22667             assemble_integer
22668               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22669           }
22670       else
22671         for (i = 0; i < units; i++)
22672           {
22673             rtx elt = CONST_VECTOR_ELT (x, i);
22674             assemble_real
22675               (*CONST_DOUBLE_REAL_VALUE (elt),
22676                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22677                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22678           }
22679
22680       return true;
22681     }
22682
22683   return default_assemble_integer (x, size, aligned_p);
22684 }
22685
22686 static void
22687 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22688 {
22689   section *s;
22690
22691   if (!TARGET_AAPCS_BASED)
22692     {
22693       (is_ctor ?
22694        default_named_section_asm_out_constructor
22695        : default_named_section_asm_out_destructor) (symbol, priority);
22696       return;
22697     }
22698
22699   /* Put these in the .init_array section, using a special relocation.  */
22700   if (priority != DEFAULT_INIT_PRIORITY)
22701     {
22702       char buf[18];
22703       sprintf (buf, "%s.%.5u",
22704                is_ctor ? ".init_array" : ".fini_array",
22705                priority);
22706       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22707     }
22708   else if (is_ctor)
22709     s = ctors_section;
22710   else
22711     s = dtors_section;
22712
22713   switch_to_section (s);
22714   assemble_align (POINTER_SIZE);
22715   fputs ("\t.word\t", asm_out_file);
22716   output_addr_const (asm_out_file, symbol);
22717   fputs ("(target1)\n", asm_out_file);
22718 }
22719
22720 /* Add a function to the list of static constructors.  */
22721
22722 static void
22723 arm_elf_asm_constructor (rtx symbol, int priority)
22724 {
22725   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22726 }
22727
22728 /* Add a function to the list of static destructors.  */
22729
22730 static void
22731 arm_elf_asm_destructor (rtx symbol, int priority)
22732 {
22733   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22734 }
22735 \f
22736 /* A finite state machine takes care of noticing whether or not instructions
22737    can be conditionally executed, and thus decrease execution time and code
22738    size by deleting branch instructions.  The fsm is controlled by
22739    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22740
22741 /* The state of the fsm controlling condition codes are:
22742    0: normal, do nothing special
22743    1: make ASM_OUTPUT_OPCODE not output this instruction
22744    2: make ASM_OUTPUT_OPCODE not output this instruction
22745    3: make instructions conditional
22746    4: make instructions conditional
22747
22748    State transitions (state->state by whom under condition):
22749    0 -> 1 final_prescan_insn if the `target' is a label
22750    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22751    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22752    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22753    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22754           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22755    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22756           (the target insn is arm_target_insn).
22757
22758    If the jump clobbers the conditions then we use states 2 and 4.
22759
22760    A similar thing can be done with conditional return insns.
22761
22762    XXX In case the `target' is an unconditional branch, this conditionalising
22763    of the instructions always reduces code size, but not always execution
22764    time.  But then, I want to reduce the code size to somewhere near what
22765    /bin/cc produces.  */
22766
22767 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22768    instructions.  When a COND_EXEC instruction is seen the subsequent
22769    instructions are scanned so that multiple conditional instructions can be
22770    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22771    specify the length and true/false mask for the IT block.  These will be
22772    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22773
22774 /* Returns the index of the ARM condition code string in
22775    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22776    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22777
22778 enum arm_cond_code
22779 maybe_get_arm_condition_code (rtx comparison)
22780 {
22781   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22782   enum arm_cond_code code;
22783   enum rtx_code comp_code = GET_CODE (comparison);
22784
22785   if (GET_MODE_CLASS (mode) != MODE_CC)
22786     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22787                            XEXP (comparison, 1));
22788
22789   switch (mode)
22790     {
22791     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22792     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22793     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22794     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22795     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22796     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22797     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22798     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22799     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22800     case E_CC_DLTUmode: code = ARM_CC;
22801
22802     dominance:
22803       if (comp_code == EQ)
22804         return ARM_INVERSE_CONDITION_CODE (code);
22805       if (comp_code == NE)
22806         return code;
22807       return ARM_NV;
22808
22809     case E_CC_NOOVmode:
22810       switch (comp_code)
22811         {
22812         case NE: return ARM_NE;
22813         case EQ: return ARM_EQ;
22814         case GE: return ARM_PL;
22815         case LT: return ARM_MI;
22816         default: return ARM_NV;
22817         }
22818
22819     case E_CC_Zmode:
22820       switch (comp_code)
22821         {
22822         case NE: return ARM_NE;
22823         case EQ: return ARM_EQ;
22824         default: return ARM_NV;
22825         }
22826
22827     case E_CC_Nmode:
22828       switch (comp_code)
22829         {
22830         case NE: return ARM_MI;
22831         case EQ: return ARM_PL;
22832         default: return ARM_NV;
22833         }
22834
22835     case E_CCFPEmode:
22836     case E_CCFPmode:
22837       /* We can handle all cases except UNEQ and LTGT.  */
22838       switch (comp_code)
22839         {
22840         case GE: return ARM_GE;
22841         case GT: return ARM_GT;
22842         case LE: return ARM_LS;
22843         case LT: return ARM_MI;
22844         case NE: return ARM_NE;
22845         case EQ: return ARM_EQ;
22846         case ORDERED: return ARM_VC;
22847         case UNORDERED: return ARM_VS;
22848         case UNLT: return ARM_LT;
22849         case UNLE: return ARM_LE;
22850         case UNGT: return ARM_HI;
22851         case UNGE: return ARM_PL;
22852           /* UNEQ and LTGT do not have a representation.  */
22853         case UNEQ: /* Fall through.  */
22854         case LTGT: /* Fall through.  */
22855         default: return ARM_NV;
22856         }
22857
22858     case E_CC_SWPmode:
22859       switch (comp_code)
22860         {
22861         case NE: return ARM_NE;
22862         case EQ: return ARM_EQ;
22863         case GE: return ARM_LE;
22864         case GT: return ARM_LT;
22865         case LE: return ARM_GE;
22866         case LT: return ARM_GT;
22867         case GEU: return ARM_LS;
22868         case GTU: return ARM_CC;
22869         case LEU: return ARM_CS;
22870         case LTU: return ARM_HI;
22871         default: return ARM_NV;
22872         }
22873
22874     case E_CC_Cmode:
22875       switch (comp_code)
22876         {
22877         case LTU: return ARM_CS;
22878         case GEU: return ARM_CC;
22879         case NE: return ARM_CS;
22880         case EQ: return ARM_CC;
22881         default: return ARM_NV;
22882         }
22883
22884     case E_CC_CZmode:
22885       switch (comp_code)
22886         {
22887         case NE: return ARM_NE;
22888         case EQ: return ARM_EQ;
22889         case GEU: return ARM_CS;
22890         case GTU: return ARM_HI;
22891         case LEU: return ARM_LS;
22892         case LTU: return ARM_CC;
22893         default: return ARM_NV;
22894         }
22895
22896     case E_CC_NCVmode:
22897       switch (comp_code)
22898         {
22899         case GE: return ARM_GE;
22900         case LT: return ARM_LT;
22901         case GEU: return ARM_CS;
22902         case LTU: return ARM_CC;
22903         default: return ARM_NV;
22904         }
22905
22906     case E_CC_Vmode:
22907       switch (comp_code)
22908         {
22909         case NE: return ARM_VS;
22910         case EQ: return ARM_VC;
22911         default: return ARM_NV;
22912         }
22913
22914     case E_CCmode:
22915       switch (comp_code)
22916         {
22917         case NE: return ARM_NE;
22918         case EQ: return ARM_EQ;
22919         case GE: return ARM_GE;
22920         case GT: return ARM_GT;
22921         case LE: return ARM_LE;
22922         case LT: return ARM_LT;
22923         case GEU: return ARM_CS;
22924         case GTU: return ARM_HI;
22925         case LEU: return ARM_LS;
22926         case LTU: return ARM_CC;
22927         default: return ARM_NV;
22928         }
22929
22930     default: gcc_unreachable ();
22931     }
22932 }
22933
22934 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22935 static enum arm_cond_code
22936 get_arm_condition_code (rtx comparison)
22937 {
22938   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22939   gcc_assert (code != ARM_NV);
22940   return code;
22941 }
22942
22943 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22944    code registers when not targetting Thumb1.  The VFP condition register
22945    only exists when generating hard-float code.  */
22946 static bool
22947 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22948 {
22949   if (!TARGET_32BIT)
22950     return false;
22951
22952   *p1 = CC_REGNUM;
22953   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22954   return true;
22955 }
22956
22957 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22958    instructions.  */
22959 void
22960 thumb2_final_prescan_insn (rtx_insn *insn)
22961 {
22962   rtx_insn *first_insn = insn;
22963   rtx body = PATTERN (insn);
22964   rtx predicate;
22965   enum arm_cond_code code;
22966   int n;
22967   int mask;
22968   int max;
22969
22970   /* max_insns_skipped in the tune was already taken into account in the
22971      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22972      just emit the IT blocks as we can.  It does not make sense to split
22973      the IT blocks.  */
22974   max = MAX_INSN_PER_IT_BLOCK;
22975
22976   /* Remove the previous insn from the count of insns to be output.  */
22977   if (arm_condexec_count)
22978       arm_condexec_count--;
22979
22980   /* Nothing to do if we are already inside a conditional block.  */
22981   if (arm_condexec_count)
22982     return;
22983
22984   if (GET_CODE (body) != COND_EXEC)
22985     return;
22986
22987   /* Conditional jumps are implemented directly.  */
22988   if (JUMP_P (insn))
22989     return;
22990
22991   predicate = COND_EXEC_TEST (body);
22992   arm_current_cc = get_arm_condition_code (predicate);
22993
22994   n = get_attr_ce_count (insn);
22995   arm_condexec_count = 1;
22996   arm_condexec_mask = (1 << n) - 1;
22997   arm_condexec_masklen = n;
22998   /* See if subsequent instructions can be combined into the same block.  */
22999   for (;;)
23000     {
23001       insn = next_nonnote_insn (insn);
23002
23003       /* Jumping into the middle of an IT block is illegal, so a label or
23004          barrier terminates the block.  */
23005       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23006         break;
23007
23008       body = PATTERN (insn);
23009       /* USE and CLOBBER aren't really insns, so just skip them.  */
23010       if (GET_CODE (body) == USE
23011           || GET_CODE (body) == CLOBBER)
23012         continue;
23013
23014       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23015       if (GET_CODE (body) != COND_EXEC)
23016         break;
23017       /* Maximum number of conditionally executed instructions in a block.  */
23018       n = get_attr_ce_count (insn);
23019       if (arm_condexec_masklen + n > max)
23020         break;
23021
23022       predicate = COND_EXEC_TEST (body);
23023       code = get_arm_condition_code (predicate);
23024       mask = (1 << n) - 1;
23025       if (arm_current_cc == code)
23026         arm_condexec_mask |= (mask << arm_condexec_masklen);
23027       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23028         break;
23029
23030       arm_condexec_count++;
23031       arm_condexec_masklen += n;
23032
23033       /* A jump must be the last instruction in a conditional block.  */
23034       if (JUMP_P (insn))
23035         break;
23036     }
23037   /* Restore recog_data (getting the attributes of other insns can
23038      destroy this array, but final.c assumes that it remains intact
23039      across this call).  */
23040   extract_constrain_insn_cached (first_insn);
23041 }
23042
23043 void
23044 arm_final_prescan_insn (rtx_insn *insn)
23045 {
23046   /* BODY will hold the body of INSN.  */
23047   rtx body = PATTERN (insn);
23048
23049   /* This will be 1 if trying to repeat the trick, and things need to be
23050      reversed if it appears to fail.  */
23051   int reverse = 0;
23052
23053   /* If we start with a return insn, we only succeed if we find another one.  */
23054   int seeking_return = 0;
23055   enum rtx_code return_code = UNKNOWN;
23056
23057   /* START_INSN will hold the insn from where we start looking.  This is the
23058      first insn after the following code_label if REVERSE is true.  */
23059   rtx_insn *start_insn = insn;
23060
23061   /* If in state 4, check if the target branch is reached, in order to
23062      change back to state 0.  */
23063   if (arm_ccfsm_state == 4)
23064     {
23065       if (insn == arm_target_insn)
23066         {
23067           arm_target_insn = NULL;
23068           arm_ccfsm_state = 0;
23069         }
23070       return;
23071     }
23072
23073   /* If in state 3, it is possible to repeat the trick, if this insn is an
23074      unconditional branch to a label, and immediately following this branch
23075      is the previous target label which is only used once, and the label this
23076      branch jumps to is not too far off.  */
23077   if (arm_ccfsm_state == 3)
23078     {
23079       if (simplejump_p (insn))
23080         {
23081           start_insn = next_nonnote_insn (start_insn);
23082           if (BARRIER_P (start_insn))
23083             {
23084               /* XXX Isn't this always a barrier?  */
23085               start_insn = next_nonnote_insn (start_insn);
23086             }
23087           if (LABEL_P (start_insn)
23088               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23089               && LABEL_NUSES (start_insn) == 1)
23090             reverse = TRUE;
23091           else
23092             return;
23093         }
23094       else if (ANY_RETURN_P (body))
23095         {
23096           start_insn = next_nonnote_insn (start_insn);
23097           if (BARRIER_P (start_insn))
23098             start_insn = next_nonnote_insn (start_insn);
23099           if (LABEL_P (start_insn)
23100               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23101               && LABEL_NUSES (start_insn) == 1)
23102             {
23103               reverse = TRUE;
23104               seeking_return = 1;
23105               return_code = GET_CODE (body);
23106             }
23107           else
23108             return;
23109         }
23110       else
23111         return;
23112     }
23113
23114   gcc_assert (!arm_ccfsm_state || reverse);
23115   if (!JUMP_P (insn))
23116     return;
23117
23118   /* This jump might be paralleled with a clobber of the condition codes
23119      the jump should always come first */
23120   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23121     body = XVECEXP (body, 0, 0);
23122
23123   if (reverse
23124       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23125           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23126     {
23127       int insns_skipped;
23128       int fail = FALSE, succeed = FALSE;
23129       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23130       int then_not_else = TRUE;
23131       rtx_insn *this_insn = start_insn;
23132       rtx label = 0;
23133
23134       /* Register the insn jumped to.  */
23135       if (reverse)
23136         {
23137           if (!seeking_return)
23138             label = XEXP (SET_SRC (body), 0);
23139         }
23140       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23141         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23142       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23143         {
23144           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23145           then_not_else = FALSE;
23146         }
23147       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23148         {
23149           seeking_return = 1;
23150           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23151         }
23152       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23153         {
23154           seeking_return = 1;
23155           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23156           then_not_else = FALSE;
23157         }
23158       else
23159         gcc_unreachable ();
23160
23161       /* See how many insns this branch skips, and what kind of insns.  If all
23162          insns are okay, and the label or unconditional branch to the same
23163          label is not too far away, succeed.  */
23164       for (insns_skipped = 0;
23165            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23166         {
23167           rtx scanbody;
23168
23169           this_insn = next_nonnote_insn (this_insn);
23170           if (!this_insn)
23171             break;
23172
23173           switch (GET_CODE (this_insn))
23174             {
23175             case CODE_LABEL:
23176               /* Succeed if it is the target label, otherwise fail since
23177                  control falls in from somewhere else.  */
23178               if (this_insn == label)
23179                 {
23180                   arm_ccfsm_state = 1;
23181                   succeed = TRUE;
23182                 }
23183               else
23184                 fail = TRUE;
23185               break;
23186
23187             case BARRIER:
23188               /* Succeed if the following insn is the target label.
23189                  Otherwise fail.
23190                  If return insns are used then the last insn in a function
23191                  will be a barrier.  */
23192               this_insn = next_nonnote_insn (this_insn);
23193               if (this_insn && this_insn == label)
23194                 {
23195                   arm_ccfsm_state = 1;
23196                   succeed = TRUE;
23197                 }
23198               else
23199                 fail = TRUE;
23200               break;
23201
23202             case CALL_INSN:
23203               /* The AAPCS says that conditional calls should not be
23204                  used since they make interworking inefficient (the
23205                  linker can't transform BL<cond> into BLX).  That's
23206                  only a problem if the machine has BLX.  */
23207               if (arm_arch5)
23208                 {
23209                   fail = TRUE;
23210                   break;
23211                 }
23212
23213               /* Succeed if the following insn is the target label, or
23214                  if the following two insns are a barrier and the
23215                  target label.  */
23216               this_insn = next_nonnote_insn (this_insn);
23217               if (this_insn && BARRIER_P (this_insn))
23218                 this_insn = next_nonnote_insn (this_insn);
23219
23220               if (this_insn && this_insn == label
23221                   && insns_skipped < max_insns_skipped)
23222                 {
23223                   arm_ccfsm_state = 1;
23224                   succeed = TRUE;
23225                 }
23226               else
23227                 fail = TRUE;
23228               break;
23229
23230             case JUMP_INSN:
23231               /* If this is an unconditional branch to the same label, succeed.
23232                  If it is to another label, do nothing.  If it is conditional,
23233                  fail.  */
23234               /* XXX Probably, the tests for SET and the PC are
23235                  unnecessary.  */
23236
23237               scanbody = PATTERN (this_insn);
23238               if (GET_CODE (scanbody) == SET
23239                   && GET_CODE (SET_DEST (scanbody)) == PC)
23240                 {
23241                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23242                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23243                     {
23244                       arm_ccfsm_state = 2;
23245                       succeed = TRUE;
23246                     }
23247                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23248                     fail = TRUE;
23249                 }
23250               /* Fail if a conditional return is undesirable (e.g. on a
23251                  StrongARM), but still allow this if optimizing for size.  */
23252               else if (GET_CODE (scanbody) == return_code
23253                        && !use_return_insn (TRUE, NULL)
23254                        && !optimize_size)
23255                 fail = TRUE;
23256               else if (GET_CODE (scanbody) == return_code)
23257                 {
23258                   arm_ccfsm_state = 2;
23259                   succeed = TRUE;
23260                 }
23261               else if (GET_CODE (scanbody) == PARALLEL)
23262                 {
23263                   switch (get_attr_conds (this_insn))
23264                     {
23265                     case CONDS_NOCOND:
23266                       break;
23267                     default:
23268                       fail = TRUE;
23269                       break;
23270                     }
23271                 }
23272               else
23273                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23274
23275               break;
23276
23277             case INSN:
23278               /* Instructions using or affecting the condition codes make it
23279                  fail.  */
23280               scanbody = PATTERN (this_insn);
23281               if (!(GET_CODE (scanbody) == SET
23282                     || GET_CODE (scanbody) == PARALLEL)
23283                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23284                 fail = TRUE;
23285               break;
23286
23287             default:
23288               break;
23289             }
23290         }
23291       if (succeed)
23292         {
23293           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23294             arm_target_label = CODE_LABEL_NUMBER (label);
23295           else
23296             {
23297               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23298
23299               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23300                 {
23301                   this_insn = next_nonnote_insn (this_insn);
23302                   gcc_assert (!this_insn
23303                               || (!BARRIER_P (this_insn)
23304                                   && !LABEL_P (this_insn)));
23305                 }
23306               if (!this_insn)
23307                 {
23308                   /* Oh, dear! we ran off the end.. give up.  */
23309                   extract_constrain_insn_cached (insn);
23310                   arm_ccfsm_state = 0;
23311                   arm_target_insn = NULL;
23312                   return;
23313                 }
23314               arm_target_insn = this_insn;
23315             }
23316
23317           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23318              what it was.  */
23319           if (!reverse)
23320             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23321
23322           if (reverse || then_not_else)
23323             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23324         }
23325
23326       /* Restore recog_data (getting the attributes of other insns can
23327          destroy this array, but final.c assumes that it remains intact
23328          across this call.  */
23329       extract_constrain_insn_cached (insn);
23330     }
23331 }
23332
23333 /* Output IT instructions.  */
23334 void
23335 thumb2_asm_output_opcode (FILE * stream)
23336 {
23337   char buff[5];
23338   int n;
23339
23340   if (arm_condexec_mask)
23341     {
23342       for (n = 0; n < arm_condexec_masklen; n++)
23343         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23344       buff[n] = 0;
23345       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23346                   arm_condition_codes[arm_current_cc]);
23347       arm_condexec_mask = 0;
23348     }
23349 }
23350
23351 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23352 static bool
23353 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23354 {
23355   if (GET_MODE_CLASS (mode) == MODE_CC)
23356     return (regno == CC_REGNUM
23357             || (TARGET_HARD_FLOAT
23358                 && regno == VFPCC_REGNUM));
23359
23360   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23361     return false;
23362
23363   if (TARGET_THUMB1)
23364     /* For the Thumb we only allow values bigger than SImode in
23365        registers 0 - 6, so that there is always a second low
23366        register available to hold the upper part of the value.
23367        We probably we ought to ensure that the register is the
23368        start of an even numbered register pair.  */
23369     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23370
23371   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23372     {
23373       if (mode == SFmode || mode == SImode)
23374         return VFP_REGNO_OK_FOR_SINGLE (regno);
23375
23376       if (mode == DFmode)
23377         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23378
23379       if (mode == HFmode)
23380         return VFP_REGNO_OK_FOR_SINGLE (regno);
23381
23382       /* VFP registers can hold HImode values.  */
23383       if (mode == HImode)
23384         return VFP_REGNO_OK_FOR_SINGLE (regno);
23385
23386       if (TARGET_NEON)
23387         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23388                || (VALID_NEON_QREG_MODE (mode)
23389                    && NEON_REGNO_OK_FOR_QUAD (regno))
23390                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23391                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23392                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23393                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23394                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23395
23396       return false;
23397     }
23398
23399   if (TARGET_REALLY_IWMMXT)
23400     {
23401       if (IS_IWMMXT_GR_REGNUM (regno))
23402         return mode == SImode;
23403
23404       if (IS_IWMMXT_REGNUM (regno))
23405         return VALID_IWMMXT_REG_MODE (mode);
23406     }
23407
23408   /* We allow almost any value to be stored in the general registers.
23409      Restrict doubleword quantities to even register pairs in ARM state
23410      so that we can use ldrd.  Do not allow very large Neon structure
23411      opaque modes in general registers; they would use too many.  */
23412   if (regno <= LAST_ARM_REGNUM)
23413     {
23414       if (ARM_NUM_REGS (mode) > 4)
23415         return false;
23416
23417       if (TARGET_THUMB2)
23418         return true;
23419
23420       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23421     }
23422
23423   if (regno == FRAME_POINTER_REGNUM
23424       || regno == ARG_POINTER_REGNUM)
23425     /* We only allow integers in the fake hard registers.  */
23426     return GET_MODE_CLASS (mode) == MODE_INT;
23427
23428   return false;
23429 }
23430
23431 /* Implement TARGET_MODES_TIEABLE_P.  */
23432
23433 static bool
23434 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23435 {
23436   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23437     return true;
23438
23439   /* We specifically want to allow elements of "structure" modes to
23440      be tieable to the structure.  This more general condition allows
23441      other rarer situations too.  */
23442   if (TARGET_NEON
23443       && (VALID_NEON_DREG_MODE (mode1)
23444           || VALID_NEON_QREG_MODE (mode1)
23445           || VALID_NEON_STRUCT_MODE (mode1))
23446       && (VALID_NEON_DREG_MODE (mode2)
23447           || VALID_NEON_QREG_MODE (mode2)
23448           || VALID_NEON_STRUCT_MODE (mode2)))
23449     return true;
23450
23451   return false;
23452 }
23453
23454 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23455    not used in arm mode.  */
23456
23457 enum reg_class
23458 arm_regno_class (int regno)
23459 {
23460   if (regno == PC_REGNUM)
23461     return NO_REGS;
23462
23463   if (TARGET_THUMB1)
23464     {
23465       if (regno == STACK_POINTER_REGNUM)
23466         return STACK_REG;
23467       if (regno == CC_REGNUM)
23468         return CC_REG;
23469       if (regno < 8)
23470         return LO_REGS;
23471       return HI_REGS;
23472     }
23473
23474   if (TARGET_THUMB2 && regno < 8)
23475     return LO_REGS;
23476
23477   if (   regno <= LAST_ARM_REGNUM
23478       || regno == FRAME_POINTER_REGNUM
23479       || regno == ARG_POINTER_REGNUM)
23480     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23481
23482   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23483     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23484
23485   if (IS_VFP_REGNUM (regno))
23486     {
23487       if (regno <= D7_VFP_REGNUM)
23488         return VFP_D0_D7_REGS;
23489       else if (regno <= LAST_LO_VFP_REGNUM)
23490         return VFP_LO_REGS;
23491       else
23492         return VFP_HI_REGS;
23493     }
23494
23495   if (IS_IWMMXT_REGNUM (regno))
23496     return IWMMXT_REGS;
23497
23498   if (IS_IWMMXT_GR_REGNUM (regno))
23499     return IWMMXT_GR_REGS;
23500
23501   return NO_REGS;
23502 }
23503
23504 /* Handle a special case when computing the offset
23505    of an argument from the frame pointer.  */
23506 int
23507 arm_debugger_arg_offset (int value, rtx addr)
23508 {
23509   rtx_insn *insn;
23510
23511   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23512   if (value != 0)
23513     return 0;
23514
23515   /* We can only cope with the case where the address is held in a register.  */
23516   if (!REG_P (addr))
23517     return 0;
23518
23519   /* If we are using the frame pointer to point at the argument, then
23520      an offset of 0 is correct.  */
23521   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23522     return 0;
23523
23524   /* If we are using the stack pointer to point at the
23525      argument, then an offset of 0 is correct.  */
23526   /* ??? Check this is consistent with thumb2 frame layout.  */
23527   if ((TARGET_THUMB || !frame_pointer_needed)
23528       && REGNO (addr) == SP_REGNUM)
23529     return 0;
23530
23531   /* Oh dear.  The argument is pointed to by a register rather
23532      than being held in a register, or being stored at a known
23533      offset from the frame pointer.  Since GDB only understands
23534      those two kinds of argument we must translate the address
23535      held in the register into an offset from the frame pointer.
23536      We do this by searching through the insns for the function
23537      looking to see where this register gets its value.  If the
23538      register is initialized from the frame pointer plus an offset
23539      then we are in luck and we can continue, otherwise we give up.
23540
23541      This code is exercised by producing debugging information
23542      for a function with arguments like this:
23543
23544            double func (double a, double b, int c, double d) {return d;}
23545
23546      Without this code the stab for parameter 'd' will be set to
23547      an offset of 0 from the frame pointer, rather than 8.  */
23548
23549   /* The if() statement says:
23550
23551      If the insn is a normal instruction
23552      and if the insn is setting the value in a register
23553      and if the register being set is the register holding the address of the argument
23554      and if the address is computing by an addition
23555      that involves adding to a register
23556      which is the frame pointer
23557      a constant integer
23558
23559      then...  */
23560
23561   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23562     {
23563       if (   NONJUMP_INSN_P (insn)
23564           && GET_CODE (PATTERN (insn)) == SET
23565           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23566           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23567           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23568           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23569           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23570              )
23571         {
23572           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23573
23574           break;
23575         }
23576     }
23577
23578   if (value == 0)
23579     {
23580       debug_rtx (addr);
23581       warning (0, "unable to compute real location of stacked parameter");
23582       value = 8; /* XXX magic hack */
23583     }
23584
23585   return value;
23586 }
23587 \f
23588 /* Implement TARGET_PROMOTED_TYPE.  */
23589
23590 static tree
23591 arm_promoted_type (const_tree t)
23592 {
23593   if (SCALAR_FLOAT_TYPE_P (t)
23594       && TYPE_PRECISION (t) == 16
23595       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23596     return float_type_node;
23597   return NULL_TREE;
23598 }
23599
23600 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23601    This simply adds HFmode as a supported mode; even though we don't
23602    implement arithmetic on this type directly, it's supported by
23603    optabs conversions, much the way the double-word arithmetic is
23604    special-cased in the default hook.  */
23605
23606 static bool
23607 arm_scalar_mode_supported_p (scalar_mode mode)
23608 {
23609   if (mode == HFmode)
23610     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23611   else if (ALL_FIXED_POINT_MODE_P (mode))
23612     return true;
23613   else
23614     return default_scalar_mode_supported_p (mode);
23615 }
23616
23617 /* Set the value of FLT_EVAL_METHOD.
23618    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23619
23620     0: evaluate all operations and constants, whose semantic type has at
23621        most the range and precision of type float, to the range and
23622        precision of float; evaluate all other operations and constants to
23623        the range and precision of the semantic type;
23624
23625     N, where _FloatN is a supported interchange floating type
23626        evaluate all operations and constants, whose semantic type has at
23627        most the range and precision of _FloatN type, to the range and
23628        precision of the _FloatN type; evaluate all other operations and
23629        constants to the range and precision of the semantic type;
23630
23631    If we have the ARMv8.2-A extensions then we support _Float16 in native
23632    precision, so we should set this to 16.  Otherwise, we support the type,
23633    but want to evaluate expressions in float precision, so set this to
23634    0.  */
23635
23636 static enum flt_eval_method
23637 arm_excess_precision (enum excess_precision_type type)
23638 {
23639   switch (type)
23640     {
23641       case EXCESS_PRECISION_TYPE_FAST:
23642       case EXCESS_PRECISION_TYPE_STANDARD:
23643         /* We can calculate either in 16-bit range and precision or
23644            32-bit range and precision.  Make that decision based on whether
23645            we have native support for the ARMv8.2-A 16-bit floating-point
23646            instructions or not.  */
23647         return (TARGET_VFP_FP16INST
23648                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23649                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23650       case EXCESS_PRECISION_TYPE_IMPLICIT:
23651         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23652       default:
23653         gcc_unreachable ();
23654     }
23655   return FLT_EVAL_METHOD_UNPREDICTABLE;
23656 }
23657
23658
23659 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23660    _Float16 if we are using anything other than ieee format for 16-bit
23661    floating point.  Otherwise, punt to the default implementation.  */
23662 static opt_scalar_float_mode
23663 arm_floatn_mode (int n, bool extended)
23664 {
23665   if (!extended && n == 16)
23666     {
23667       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23668         return HFmode;
23669       return opt_scalar_float_mode ();
23670     }
23671
23672   return default_floatn_mode (n, extended);
23673 }
23674
23675
23676 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23677    not to early-clobber SRC registers in the process.
23678
23679    We assume that the operands described by SRC and DEST represent a
23680    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23681    number of components into which the copy has been decomposed.  */
23682 void
23683 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23684 {
23685   unsigned int i;
23686
23687   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23688       || REGNO (operands[0]) < REGNO (operands[1]))
23689     {
23690       for (i = 0; i < count; i++)
23691         {
23692           operands[2 * i] = dest[i];
23693           operands[2 * i + 1] = src[i];
23694         }
23695     }
23696   else
23697     {
23698       for (i = 0; i < count; i++)
23699         {
23700           operands[2 * i] = dest[count - i - 1];
23701           operands[2 * i + 1] = src[count - i - 1];
23702         }
23703     }
23704 }
23705
23706 /* Split operands into moves from op[1] + op[2] into op[0].  */
23707
23708 void
23709 neon_split_vcombine (rtx operands[3])
23710 {
23711   unsigned int dest = REGNO (operands[0]);
23712   unsigned int src1 = REGNO (operands[1]);
23713   unsigned int src2 = REGNO (operands[2]);
23714   machine_mode halfmode = GET_MODE (operands[1]);
23715   unsigned int halfregs = REG_NREGS (operands[1]);
23716   rtx destlo, desthi;
23717
23718   if (src1 == dest && src2 == dest + halfregs)
23719     {
23720       /* No-op move.  Can't split to nothing; emit something.  */
23721       emit_note (NOTE_INSN_DELETED);
23722       return;
23723     }
23724
23725   /* Preserve register attributes for variable tracking.  */
23726   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23727   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23728                                GET_MODE_SIZE (halfmode));
23729
23730   /* Special case of reversed high/low parts.  Use VSWP.  */
23731   if (src2 == dest && src1 == dest + halfregs)
23732     {
23733       rtx x = gen_rtx_SET (destlo, operands[1]);
23734       rtx y = gen_rtx_SET (desthi, operands[2]);
23735       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23736       return;
23737     }
23738
23739   if (!reg_overlap_mentioned_p (operands[2], destlo))
23740     {
23741       /* Try to avoid unnecessary moves if part of the result
23742          is in the right place already.  */
23743       if (src1 != dest)
23744         emit_move_insn (destlo, operands[1]);
23745       if (src2 != dest + halfregs)
23746         emit_move_insn (desthi, operands[2]);
23747     }
23748   else
23749     {
23750       if (src2 != dest + halfregs)
23751         emit_move_insn (desthi, operands[2]);
23752       if (src1 != dest)
23753         emit_move_insn (destlo, operands[1]);
23754     }
23755 }
23756 \f
23757 /* Return the number (counting from 0) of
23758    the least significant set bit in MASK.  */
23759
23760 inline static int
23761 number_of_first_bit_set (unsigned mask)
23762 {
23763   return ctz_hwi (mask);
23764 }
23765
23766 /* Like emit_multi_reg_push, but allowing for a different set of
23767    registers to be described as saved.  MASK is the set of registers
23768    to be saved; REAL_REGS is the set of registers to be described as
23769    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23770
23771 static rtx_insn *
23772 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23773 {
23774   unsigned long regno;
23775   rtx par[10], tmp, reg;
23776   rtx_insn *insn;
23777   int i, j;
23778
23779   /* Build the parallel of the registers actually being stored.  */
23780   for (i = 0; mask; ++i, mask &= mask - 1)
23781     {
23782       regno = ctz_hwi (mask);
23783       reg = gen_rtx_REG (SImode, regno);
23784
23785       if (i == 0)
23786         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23787       else
23788         tmp = gen_rtx_USE (VOIDmode, reg);
23789
23790       par[i] = tmp;
23791     }
23792
23793   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23794   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23795   tmp = gen_frame_mem (BLKmode, tmp);
23796   tmp = gen_rtx_SET (tmp, par[0]);
23797   par[0] = tmp;
23798
23799   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23800   insn = emit_insn (tmp);
23801
23802   /* Always build the stack adjustment note for unwind info.  */
23803   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23804   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23805   par[0] = tmp;
23806
23807   /* Build the parallel of the registers recorded as saved for unwind.  */
23808   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23809     {
23810       regno = ctz_hwi (real_regs);
23811       reg = gen_rtx_REG (SImode, regno);
23812
23813       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23814       tmp = gen_frame_mem (SImode, tmp);
23815       tmp = gen_rtx_SET (tmp, reg);
23816       RTX_FRAME_RELATED_P (tmp) = 1;
23817       par[j + 1] = tmp;
23818     }
23819
23820   if (j == 0)
23821     tmp = par[0];
23822   else
23823     {
23824       RTX_FRAME_RELATED_P (par[0]) = 1;
23825       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23826     }
23827
23828   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23829
23830   return insn;
23831 }
23832
23833 /* Emit code to push or pop registers to or from the stack.  F is the
23834    assembly file.  MASK is the registers to pop.  */
23835 static void
23836 thumb_pop (FILE *f, unsigned long mask)
23837 {
23838   int regno;
23839   int lo_mask = mask & 0xFF;
23840
23841   gcc_assert (mask);
23842
23843   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23844     {
23845       /* Special case.  Do not generate a POP PC statement here, do it in
23846          thumb_exit() */
23847       thumb_exit (f, -1);
23848       return;
23849     }
23850
23851   fprintf (f, "\tpop\t{");
23852
23853   /* Look at the low registers first.  */
23854   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23855     {
23856       if (lo_mask & 1)
23857         {
23858           asm_fprintf (f, "%r", regno);
23859
23860           if ((lo_mask & ~1) != 0)
23861             fprintf (f, ", ");
23862         }
23863     }
23864
23865   if (mask & (1 << PC_REGNUM))
23866     {
23867       /* Catch popping the PC.  */
23868       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23869           || IS_CMSE_ENTRY (arm_current_func_type ()))
23870         {
23871           /* The PC is never poped directly, instead
23872              it is popped into r3 and then BX is used.  */
23873           fprintf (f, "}\n");
23874
23875           thumb_exit (f, -1);
23876
23877           return;
23878         }
23879       else
23880         {
23881           if (mask & 0xFF)
23882             fprintf (f, ", ");
23883
23884           asm_fprintf (f, "%r", PC_REGNUM);
23885         }
23886     }
23887
23888   fprintf (f, "}\n");
23889 }
23890
23891 /* Generate code to return from a thumb function.
23892    If 'reg_containing_return_addr' is -1, then the return address is
23893    actually on the stack, at the stack pointer.  */
23894 static void
23895 thumb_exit (FILE *f, int reg_containing_return_addr)
23896 {
23897   unsigned regs_available_for_popping;
23898   unsigned regs_to_pop;
23899   int pops_needed;
23900   unsigned available;
23901   unsigned required;
23902   machine_mode mode;
23903   int size;
23904   int restore_a4 = FALSE;
23905
23906   /* Compute the registers we need to pop.  */
23907   regs_to_pop = 0;
23908   pops_needed = 0;
23909
23910   if (reg_containing_return_addr == -1)
23911     {
23912       regs_to_pop |= 1 << LR_REGNUM;
23913       ++pops_needed;
23914     }
23915
23916   if (TARGET_BACKTRACE)
23917     {
23918       /* Restore the (ARM) frame pointer and stack pointer.  */
23919       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23920       pops_needed += 2;
23921     }
23922
23923   /* If there is nothing to pop then just emit the BX instruction and
23924      return.  */
23925   if (pops_needed == 0)
23926     {
23927       if (crtl->calls_eh_return)
23928         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23929
23930       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23931         {
23932           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23933                        reg_containing_return_addr);
23934           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23935         }
23936       else
23937         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23938       return;
23939     }
23940   /* Otherwise if we are not supporting interworking and we have not created
23941      a backtrace structure and the function was not entered in ARM mode then
23942      just pop the return address straight into the PC.  */
23943   else if (!TARGET_INTERWORK
23944            && !TARGET_BACKTRACE
23945            && !is_called_in_ARM_mode (current_function_decl)
23946            && !crtl->calls_eh_return
23947            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23948     {
23949       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23950       return;
23951     }
23952
23953   /* Find out how many of the (return) argument registers we can corrupt.  */
23954   regs_available_for_popping = 0;
23955
23956   /* If returning via __builtin_eh_return, the bottom three registers
23957      all contain information needed for the return.  */
23958   if (crtl->calls_eh_return)
23959     size = 12;
23960   else
23961     {
23962       /* If we can deduce the registers used from the function's
23963          return value.  This is more reliable that examining
23964          df_regs_ever_live_p () because that will be set if the register is
23965          ever used in the function, not just if the register is used
23966          to hold a return value.  */
23967
23968       if (crtl->return_rtx != 0)
23969         mode = GET_MODE (crtl->return_rtx);
23970       else
23971         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23972
23973       size = GET_MODE_SIZE (mode);
23974
23975       if (size == 0)
23976         {
23977           /* In a void function we can use any argument register.
23978              In a function that returns a structure on the stack
23979              we can use the second and third argument registers.  */
23980           if (mode == VOIDmode)
23981             regs_available_for_popping =
23982               (1 << ARG_REGISTER (1))
23983               | (1 << ARG_REGISTER (2))
23984               | (1 << ARG_REGISTER (3));
23985           else
23986             regs_available_for_popping =
23987               (1 << ARG_REGISTER (2))
23988               | (1 << ARG_REGISTER (3));
23989         }
23990       else if (size <= 4)
23991         regs_available_for_popping =
23992           (1 << ARG_REGISTER (2))
23993           | (1 << ARG_REGISTER (3));
23994       else if (size <= 8)
23995         regs_available_for_popping =
23996           (1 << ARG_REGISTER (3));
23997     }
23998
23999   /* Match registers to be popped with registers into which we pop them.  */
24000   for (available = regs_available_for_popping,
24001        required  = regs_to_pop;
24002        required != 0 && available != 0;
24003        available &= ~(available & - available),
24004        required  &= ~(required  & - required))
24005     -- pops_needed;
24006
24007   /* If we have any popping registers left over, remove them.  */
24008   if (available > 0)
24009     regs_available_for_popping &= ~available;
24010
24011   /* Otherwise if we need another popping register we can use
24012      the fourth argument register.  */
24013   else if (pops_needed)
24014     {
24015       /* If we have not found any free argument registers and
24016          reg a4 contains the return address, we must move it.  */
24017       if (regs_available_for_popping == 0
24018           && reg_containing_return_addr == LAST_ARG_REGNUM)
24019         {
24020           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24021           reg_containing_return_addr = LR_REGNUM;
24022         }
24023       else if (size > 12)
24024         {
24025           /* Register a4 is being used to hold part of the return value,
24026              but we have dire need of a free, low register.  */
24027           restore_a4 = TRUE;
24028
24029           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24030         }
24031
24032       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24033         {
24034           /* The fourth argument register is available.  */
24035           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24036
24037           --pops_needed;
24038         }
24039     }
24040
24041   /* Pop as many registers as we can.  */
24042   thumb_pop (f, regs_available_for_popping);
24043
24044   /* Process the registers we popped.  */
24045   if (reg_containing_return_addr == -1)
24046     {
24047       /* The return address was popped into the lowest numbered register.  */
24048       regs_to_pop &= ~(1 << LR_REGNUM);
24049
24050       reg_containing_return_addr =
24051         number_of_first_bit_set (regs_available_for_popping);
24052
24053       /* Remove this register for the mask of available registers, so that
24054          the return address will not be corrupted by further pops.  */
24055       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24056     }
24057
24058   /* If we popped other registers then handle them here.  */
24059   if (regs_available_for_popping)
24060     {
24061       int frame_pointer;
24062
24063       /* Work out which register currently contains the frame pointer.  */
24064       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24065
24066       /* Move it into the correct place.  */
24067       asm_fprintf (f, "\tmov\t%r, %r\n",
24068                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24069
24070       /* (Temporarily) remove it from the mask of popped registers.  */
24071       regs_available_for_popping &= ~(1 << frame_pointer);
24072       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24073
24074       if (regs_available_for_popping)
24075         {
24076           int stack_pointer;
24077
24078           /* We popped the stack pointer as well,
24079              find the register that contains it.  */
24080           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24081
24082           /* Move it into the stack register.  */
24083           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24084
24085           /* At this point we have popped all necessary registers, so
24086              do not worry about restoring regs_available_for_popping
24087              to its correct value:
24088
24089              assert (pops_needed == 0)
24090              assert (regs_available_for_popping == (1 << frame_pointer))
24091              assert (regs_to_pop == (1 << STACK_POINTER))  */
24092         }
24093       else
24094         {
24095           /* Since we have just move the popped value into the frame
24096              pointer, the popping register is available for reuse, and
24097              we know that we still have the stack pointer left to pop.  */
24098           regs_available_for_popping |= (1 << frame_pointer);
24099         }
24100     }
24101
24102   /* If we still have registers left on the stack, but we no longer have
24103      any registers into which we can pop them, then we must move the return
24104      address into the link register and make available the register that
24105      contained it.  */
24106   if (regs_available_for_popping == 0 && pops_needed > 0)
24107     {
24108       regs_available_for_popping |= 1 << reg_containing_return_addr;
24109
24110       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24111                    reg_containing_return_addr);
24112
24113       reg_containing_return_addr = LR_REGNUM;
24114     }
24115
24116   /* If we have registers left on the stack then pop some more.
24117      We know that at most we will want to pop FP and SP.  */
24118   if (pops_needed > 0)
24119     {
24120       int  popped_into;
24121       int  move_to;
24122
24123       thumb_pop (f, regs_available_for_popping);
24124
24125       /* We have popped either FP or SP.
24126          Move whichever one it is into the correct register.  */
24127       popped_into = number_of_first_bit_set (regs_available_for_popping);
24128       move_to     = number_of_first_bit_set (regs_to_pop);
24129
24130       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24131       --pops_needed;
24132     }
24133
24134   /* If we still have not popped everything then we must have only
24135      had one register available to us and we are now popping the SP.  */
24136   if (pops_needed > 0)
24137     {
24138       int  popped_into;
24139
24140       thumb_pop (f, regs_available_for_popping);
24141
24142       popped_into = number_of_first_bit_set (regs_available_for_popping);
24143
24144       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24145       /*
24146         assert (regs_to_pop == (1 << STACK_POINTER))
24147         assert (pops_needed == 1)
24148       */
24149     }
24150
24151   /* If necessary restore the a4 register.  */
24152   if (restore_a4)
24153     {
24154       if (reg_containing_return_addr != LR_REGNUM)
24155         {
24156           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24157           reg_containing_return_addr = LR_REGNUM;
24158         }
24159
24160       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24161     }
24162
24163   if (crtl->calls_eh_return)
24164     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24165
24166   /* Return to caller.  */
24167   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24168     {
24169       /* This is for the cases where LR is not being used to contain the return
24170          address.  It may therefore contain information that we might not want
24171          to leak, hence it must be cleared.  The value in R0 will never be a
24172          secret at this point, so it is safe to use it, see the clearing code
24173          in 'cmse_nonsecure_entry_clear_before_return'.  */
24174       if (reg_containing_return_addr != LR_REGNUM)
24175         asm_fprintf (f, "\tmov\tlr, r0\n");
24176
24177       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24178       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24179     }
24180   else
24181     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24182 }
24183 \f
24184 /* Scan INSN just before assembler is output for it.
24185    For Thumb-1, we track the status of the condition codes; this
24186    information is used in the cbranchsi4_insn pattern.  */
24187 void
24188 thumb1_final_prescan_insn (rtx_insn *insn)
24189 {
24190   if (flag_print_asm_name)
24191     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24192                  INSN_ADDRESSES (INSN_UID (insn)));
24193   /* Don't overwrite the previous setter when we get to a cbranch.  */
24194   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24195     {
24196       enum attr_conds conds;
24197
24198       if (cfun->machine->thumb1_cc_insn)
24199         {
24200           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24201               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24202             CC_STATUS_INIT;
24203         }
24204       conds = get_attr_conds (insn);
24205       if (conds == CONDS_SET)
24206         {
24207           rtx set = single_set (insn);
24208           cfun->machine->thumb1_cc_insn = insn;
24209           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24210           cfun->machine->thumb1_cc_op1 = const0_rtx;
24211           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24212           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24213             {
24214               rtx src1 = XEXP (SET_SRC (set), 1);
24215               if (src1 == const0_rtx)
24216                 cfun->machine->thumb1_cc_mode = CCmode;
24217             }
24218           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24219             {
24220               /* Record the src register operand instead of dest because
24221                  cprop_hardreg pass propagates src.  */
24222               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24223             }
24224         }
24225       else if (conds != CONDS_NOCOND)
24226         cfun->machine->thumb1_cc_insn = NULL_RTX;
24227     }
24228
24229     /* Check if unexpected far jump is used.  */
24230     if (cfun->machine->lr_save_eliminated
24231         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24232       internal_error("Unexpected thumb1 far jump");
24233 }
24234
24235 int
24236 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24237 {
24238   unsigned HOST_WIDE_INT mask = 0xff;
24239   int i;
24240
24241   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24242   if (val == 0) /* XXX */
24243     return 0;
24244
24245   for (i = 0; i < 25; i++)
24246     if ((val & (mask << i)) == val)
24247       return 1;
24248
24249   return 0;
24250 }
24251
24252 /* Returns nonzero if the current function contains,
24253    or might contain a far jump.  */
24254 static int
24255 thumb_far_jump_used_p (void)
24256 {
24257   rtx_insn *insn;
24258   bool far_jump = false;
24259   unsigned int func_size = 0;
24260
24261   /* If we have already decided that far jumps may be used,
24262      do not bother checking again, and always return true even if
24263      it turns out that they are not being used.  Once we have made
24264      the decision that far jumps are present (and that hence the link
24265      register will be pushed onto the stack) we cannot go back on it.  */
24266   if (cfun->machine->far_jump_used)
24267     return 1;
24268
24269   /* If this function is not being called from the prologue/epilogue
24270      generation code then it must be being called from the
24271      INITIAL_ELIMINATION_OFFSET macro.  */
24272   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24273     {
24274       /* In this case we know that we are being asked about the elimination
24275          of the arg pointer register.  If that register is not being used,
24276          then there are no arguments on the stack, and we do not have to
24277          worry that a far jump might force the prologue to push the link
24278          register, changing the stack offsets.  In this case we can just
24279          return false, since the presence of far jumps in the function will
24280          not affect stack offsets.
24281
24282          If the arg pointer is live (or if it was live, but has now been
24283          eliminated and so set to dead) then we do have to test to see if
24284          the function might contain a far jump.  This test can lead to some
24285          false negatives, since before reload is completed, then length of
24286          branch instructions is not known, so gcc defaults to returning their
24287          longest length, which in turn sets the far jump attribute to true.
24288
24289          A false negative will not result in bad code being generated, but it
24290          will result in a needless push and pop of the link register.  We
24291          hope that this does not occur too often.
24292
24293          If we need doubleword stack alignment this could affect the other
24294          elimination offsets so we can't risk getting it wrong.  */
24295       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24296         cfun->machine->arg_pointer_live = 1;
24297       else if (!cfun->machine->arg_pointer_live)
24298         return 0;
24299     }
24300
24301   /* We should not change far_jump_used during or after reload, as there is
24302      no chance to change stack frame layout.  */
24303   if (reload_in_progress || reload_completed)
24304     return 0;
24305
24306   /* Check to see if the function contains a branch
24307      insn with the far jump attribute set.  */
24308   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24309     {
24310       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24311         {
24312           far_jump = true;
24313         }
24314       func_size += get_attr_length (insn);
24315     }
24316
24317   /* Attribute far_jump will always be true for thumb1 before
24318      shorten_branch pass.  So checking far_jump attribute before
24319      shorten_branch isn't much useful.
24320
24321      Following heuristic tries to estimate more accurately if a far jump
24322      may finally be used.  The heuristic is very conservative as there is
24323      no chance to roll-back the decision of not to use far jump.
24324
24325      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24326      2-byte insn is associated with a 4 byte constant pool.  Using
24327      function size 2048/3 as the threshold is conservative enough.  */
24328   if (far_jump)
24329     {
24330       if ((func_size * 3) >= 2048)
24331         {
24332           /* Record the fact that we have decided that
24333              the function does use far jumps.  */
24334           cfun->machine->far_jump_used = 1;
24335           return 1;
24336         }
24337     }
24338
24339   return 0;
24340 }
24341
24342 /* Return nonzero if FUNC must be entered in ARM mode.  */
24343 static bool
24344 is_called_in_ARM_mode (tree func)
24345 {
24346   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24347
24348   /* Ignore the problem about functions whose address is taken.  */
24349   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24350     return true;
24351
24352 #ifdef ARM_PE
24353   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24354 #else
24355   return false;
24356 #endif
24357 }
24358
24359 /* Given the stack offsets and register mask in OFFSETS, decide how
24360    many additional registers to push instead of subtracting a constant
24361    from SP.  For epilogues the principle is the same except we use pop.
24362    FOR_PROLOGUE indicates which we're generating.  */
24363 static int
24364 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24365 {
24366   HOST_WIDE_INT amount;
24367   unsigned long live_regs_mask = offsets->saved_regs_mask;
24368   /* Extract a mask of the ones we can give to the Thumb's push/pop
24369      instruction.  */
24370   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24371   /* Then count how many other high registers will need to be pushed.  */
24372   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24373   int n_free, reg_base, size;
24374
24375   if (!for_prologue && frame_pointer_needed)
24376     amount = offsets->locals_base - offsets->saved_regs;
24377   else
24378     amount = offsets->outgoing_args - offsets->saved_regs;
24379
24380   /* If the stack frame size is 512 exactly, we can save one load
24381      instruction, which should make this a win even when optimizing
24382      for speed.  */
24383   if (!optimize_size && amount != 512)
24384     return 0;
24385
24386   /* Can't do this if there are high registers to push.  */
24387   if (high_regs_pushed != 0)
24388     return 0;
24389
24390   /* Shouldn't do it in the prologue if no registers would normally
24391      be pushed at all.  In the epilogue, also allow it if we'll have
24392      a pop insn for the PC.  */
24393   if  (l_mask == 0
24394        && (for_prologue
24395            || TARGET_BACKTRACE
24396            || (live_regs_mask & 1 << LR_REGNUM) == 0
24397            || TARGET_INTERWORK
24398            || crtl->args.pretend_args_size != 0))
24399     return 0;
24400
24401   /* Don't do this if thumb_expand_prologue wants to emit instructions
24402      between the push and the stack frame allocation.  */
24403   if (for_prologue
24404       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24405           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24406     return 0;
24407
24408   reg_base = 0;
24409   n_free = 0;
24410   if (!for_prologue)
24411     {
24412       size = arm_size_return_regs ();
24413       reg_base = ARM_NUM_INTS (size);
24414       live_regs_mask >>= reg_base;
24415     }
24416
24417   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24418          && (for_prologue || call_used_regs[reg_base + n_free]))
24419     {
24420       live_regs_mask >>= 1;
24421       n_free++;
24422     }
24423
24424   if (n_free == 0)
24425     return 0;
24426   gcc_assert (amount / 4 * 4 == amount);
24427
24428   if (amount >= 512 && (amount - n_free * 4) < 512)
24429     return (amount - 508) / 4;
24430   if (amount <= n_free * 4)
24431     return amount / 4;
24432   return 0;
24433 }
24434
24435 /* The bits which aren't usefully expanded as rtl.  */
24436 const char *
24437 thumb1_unexpanded_epilogue (void)
24438 {
24439   arm_stack_offsets *offsets;
24440   int regno;
24441   unsigned long live_regs_mask = 0;
24442   int high_regs_pushed = 0;
24443   int extra_pop;
24444   int had_to_push_lr;
24445   int size;
24446
24447   if (cfun->machine->return_used_this_function != 0)
24448     return "";
24449
24450   if (IS_NAKED (arm_current_func_type ()))
24451     return "";
24452
24453   offsets = arm_get_frame_offsets ();
24454   live_regs_mask = offsets->saved_regs_mask;
24455   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24456
24457   /* If we can deduce the registers used from the function's return value.
24458      This is more reliable that examining df_regs_ever_live_p () because that
24459      will be set if the register is ever used in the function, not just if
24460      the register is used to hold a return value.  */
24461   size = arm_size_return_regs ();
24462
24463   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24464   if (extra_pop > 0)
24465     {
24466       unsigned long extra_mask = (1 << extra_pop) - 1;
24467       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24468     }
24469
24470   /* The prolog may have pushed some high registers to use as
24471      work registers.  e.g. the testsuite file:
24472      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24473      compiles to produce:
24474         push    {r4, r5, r6, r7, lr}
24475         mov     r7, r9
24476         mov     r6, r8
24477         push    {r6, r7}
24478      as part of the prolog.  We have to undo that pushing here.  */
24479
24480   if (high_regs_pushed)
24481     {
24482       unsigned long mask = live_regs_mask & 0xff;
24483       int next_hi_reg;
24484
24485       /* The available low registers depend on the size of the value we are
24486          returning.  */
24487       if (size <= 12)
24488         mask |=  1 << 3;
24489       if (size <= 8)
24490         mask |= 1 << 2;
24491
24492       if (mask == 0)
24493         /* Oh dear!  We have no low registers into which we can pop
24494            high registers!  */
24495         internal_error
24496           ("no low registers available for popping high registers");
24497
24498       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24499         if (live_regs_mask & (1 << next_hi_reg))
24500           break;
24501
24502       while (high_regs_pushed)
24503         {
24504           /* Find lo register(s) into which the high register(s) can
24505              be popped.  */
24506           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24507             {
24508               if (mask & (1 << regno))
24509                 high_regs_pushed--;
24510               if (high_regs_pushed == 0)
24511                 break;
24512             }
24513
24514           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24515
24516           /* Pop the values into the low register(s).  */
24517           thumb_pop (asm_out_file, mask);
24518
24519           /* Move the value(s) into the high registers.  */
24520           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24521             {
24522               if (mask & (1 << regno))
24523                 {
24524                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24525                                regno);
24526
24527                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24528                     if (live_regs_mask & (1 << next_hi_reg))
24529                       break;
24530                 }
24531             }
24532         }
24533       live_regs_mask &= ~0x0f00;
24534     }
24535
24536   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24537   live_regs_mask &= 0xff;
24538
24539   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24540     {
24541       /* Pop the return address into the PC.  */
24542       if (had_to_push_lr)
24543         live_regs_mask |= 1 << PC_REGNUM;
24544
24545       /* Either no argument registers were pushed or a backtrace
24546          structure was created which includes an adjusted stack
24547          pointer, so just pop everything.  */
24548       if (live_regs_mask)
24549         thumb_pop (asm_out_file, live_regs_mask);
24550
24551       /* We have either just popped the return address into the
24552          PC or it is was kept in LR for the entire function.
24553          Note that thumb_pop has already called thumb_exit if the
24554          PC was in the list.  */
24555       if (!had_to_push_lr)
24556         thumb_exit (asm_out_file, LR_REGNUM);
24557     }
24558   else
24559     {
24560       /* Pop everything but the return address.  */
24561       if (live_regs_mask)
24562         thumb_pop (asm_out_file, live_regs_mask);
24563
24564       if (had_to_push_lr)
24565         {
24566           if (size > 12)
24567             {
24568               /* We have no free low regs, so save one.  */
24569               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24570                            LAST_ARG_REGNUM);
24571             }
24572
24573           /* Get the return address into a temporary register.  */
24574           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24575
24576           if (size > 12)
24577             {
24578               /* Move the return address to lr.  */
24579               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24580                            LAST_ARG_REGNUM);
24581               /* Restore the low register.  */
24582               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24583                            IP_REGNUM);
24584               regno = LR_REGNUM;
24585             }
24586           else
24587             regno = LAST_ARG_REGNUM;
24588         }
24589       else
24590         regno = LR_REGNUM;
24591
24592       /* Remove the argument registers that were pushed onto the stack.  */
24593       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24594                    SP_REGNUM, SP_REGNUM,
24595                    crtl->args.pretend_args_size);
24596
24597       thumb_exit (asm_out_file, regno);
24598     }
24599
24600   return "";
24601 }
24602
24603 /* Functions to save and restore machine-specific function data.  */
24604 static struct machine_function *
24605 arm_init_machine_status (void)
24606 {
24607   struct machine_function *machine;
24608   machine = ggc_cleared_alloc<machine_function> ();
24609
24610 #if ARM_FT_UNKNOWN != 0
24611   machine->func_type = ARM_FT_UNKNOWN;
24612 #endif
24613   return machine;
24614 }
24615
24616 /* Return an RTX indicating where the return address to the
24617    calling function can be found.  */
24618 rtx
24619 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24620 {
24621   if (count != 0)
24622     return NULL_RTX;
24623
24624   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24625 }
24626
24627 /* Do anything needed before RTL is emitted for each function.  */
24628 void
24629 arm_init_expanders (void)
24630 {
24631   /* Arrange to initialize and mark the machine per-function status.  */
24632   init_machine_status = arm_init_machine_status;
24633
24634   /* This is to stop the combine pass optimizing away the alignment
24635      adjustment of va_arg.  */
24636   /* ??? It is claimed that this should not be necessary.  */
24637   if (cfun)
24638     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24639 }
24640
24641 /* Check that FUNC is called with a different mode.  */
24642
24643 bool
24644 arm_change_mode_p (tree func)
24645 {
24646   if (TREE_CODE (func) != FUNCTION_DECL)
24647     return false;
24648
24649   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24650
24651   if (!callee_tree)
24652     callee_tree = target_option_default_node;
24653
24654   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24655   int flags = callee_opts->x_target_flags;
24656
24657   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24658 }
24659
24660 /* Like arm_compute_initial_elimination offset.  Simpler because there
24661    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24662    to point at the base of the local variables after static stack
24663    space for a function has been allocated.  */
24664
24665 HOST_WIDE_INT
24666 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24667 {
24668   arm_stack_offsets *offsets;
24669
24670   offsets = arm_get_frame_offsets ();
24671
24672   switch (from)
24673     {
24674     case ARG_POINTER_REGNUM:
24675       switch (to)
24676         {
24677         case STACK_POINTER_REGNUM:
24678           return offsets->outgoing_args - offsets->saved_args;
24679
24680         case FRAME_POINTER_REGNUM:
24681           return offsets->soft_frame - offsets->saved_args;
24682
24683         case ARM_HARD_FRAME_POINTER_REGNUM:
24684           return offsets->saved_regs - offsets->saved_args;
24685
24686         case THUMB_HARD_FRAME_POINTER_REGNUM:
24687           return offsets->locals_base - offsets->saved_args;
24688
24689         default:
24690           gcc_unreachable ();
24691         }
24692       break;
24693
24694     case FRAME_POINTER_REGNUM:
24695       switch (to)
24696         {
24697         case STACK_POINTER_REGNUM:
24698           return offsets->outgoing_args - offsets->soft_frame;
24699
24700         case ARM_HARD_FRAME_POINTER_REGNUM:
24701           return offsets->saved_regs - offsets->soft_frame;
24702
24703         case THUMB_HARD_FRAME_POINTER_REGNUM:
24704           return offsets->locals_base - offsets->soft_frame;
24705
24706         default:
24707           gcc_unreachable ();
24708         }
24709       break;
24710
24711     default:
24712       gcc_unreachable ();
24713     }
24714 }
24715
24716 /* Generate the function's prologue.  */
24717
24718 void
24719 thumb1_expand_prologue (void)
24720 {
24721   rtx_insn *insn;
24722
24723   HOST_WIDE_INT amount;
24724   HOST_WIDE_INT size;
24725   arm_stack_offsets *offsets;
24726   unsigned long func_type;
24727   int regno;
24728   unsigned long live_regs_mask;
24729   unsigned long l_mask;
24730   unsigned high_regs_pushed = 0;
24731   bool lr_needs_saving;
24732
24733   func_type = arm_current_func_type ();
24734
24735   /* Naked functions don't have prologues.  */
24736   if (IS_NAKED (func_type))
24737     {
24738       if (flag_stack_usage_info)
24739         current_function_static_stack_size = 0;
24740       return;
24741     }
24742
24743   if (IS_INTERRUPT (func_type))
24744     {
24745       error ("interrupt Service Routines cannot be coded in Thumb mode");
24746       return;
24747     }
24748
24749   if (is_called_in_ARM_mode (current_function_decl))
24750     emit_insn (gen_prologue_thumb1_interwork ());
24751
24752   offsets = arm_get_frame_offsets ();
24753   live_regs_mask = offsets->saved_regs_mask;
24754   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24755
24756   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24757   l_mask = live_regs_mask & 0x40ff;
24758   /* Then count how many other high registers will need to be pushed.  */
24759   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24760
24761   if (crtl->args.pretend_args_size)
24762     {
24763       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24764
24765       if (cfun->machine->uses_anonymous_args)
24766         {
24767           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24768           unsigned long mask;
24769
24770           mask = 1ul << (LAST_ARG_REGNUM + 1);
24771           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24772
24773           insn = thumb1_emit_multi_reg_push (mask, 0);
24774         }
24775       else
24776         {
24777           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24778                                         stack_pointer_rtx, x));
24779         }
24780       RTX_FRAME_RELATED_P (insn) = 1;
24781     }
24782
24783   if (TARGET_BACKTRACE)
24784     {
24785       HOST_WIDE_INT offset = 0;
24786       unsigned work_register;
24787       rtx work_reg, x, arm_hfp_rtx;
24788
24789       /* We have been asked to create a stack backtrace structure.
24790          The code looks like this:
24791
24792          0   .align 2
24793          0   func:
24794          0     sub   SP, #16         Reserve space for 4 registers.
24795          2     push  {R7}            Push low registers.
24796          4     add   R7, SP, #20     Get the stack pointer before the push.
24797          6     str   R7, [SP, #8]    Store the stack pointer
24798                                         (before reserving the space).
24799          8     mov   R7, PC          Get hold of the start of this code + 12.
24800         10     str   R7, [SP, #16]   Store it.
24801         12     mov   R7, FP          Get hold of the current frame pointer.
24802         14     str   R7, [SP, #4]    Store it.
24803         16     mov   R7, LR          Get hold of the current return address.
24804         18     str   R7, [SP, #12]   Store it.
24805         20     add   R7, SP, #16     Point at the start of the
24806                                         backtrace structure.
24807         22     mov   FP, R7          Put this value into the frame pointer.  */
24808
24809       work_register = thumb_find_work_register (live_regs_mask);
24810       work_reg = gen_rtx_REG (SImode, work_register);
24811       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24812
24813       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24814                                     stack_pointer_rtx, GEN_INT (-16)));
24815       RTX_FRAME_RELATED_P (insn) = 1;
24816
24817       if (l_mask)
24818         {
24819           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24820           RTX_FRAME_RELATED_P (insn) = 1;
24821           lr_needs_saving = false;
24822
24823           offset = bit_count (l_mask) * UNITS_PER_WORD;
24824         }
24825
24826       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24827       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24828
24829       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24830       x = gen_frame_mem (SImode, x);
24831       emit_move_insn (x, work_reg);
24832
24833       /* Make sure that the instruction fetching the PC is in the right place
24834          to calculate "start of backtrace creation code + 12".  */
24835       /* ??? The stores using the common WORK_REG ought to be enough to
24836          prevent the scheduler from doing anything weird.  Failing that
24837          we could always move all of the following into an UNSPEC_VOLATILE.  */
24838       if (l_mask)
24839         {
24840           x = gen_rtx_REG (SImode, PC_REGNUM);
24841           emit_move_insn (work_reg, x);
24842
24843           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24844           x = gen_frame_mem (SImode, x);
24845           emit_move_insn (x, work_reg);
24846
24847           emit_move_insn (work_reg, arm_hfp_rtx);
24848
24849           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24850           x = gen_frame_mem (SImode, x);
24851           emit_move_insn (x, work_reg);
24852         }
24853       else
24854         {
24855           emit_move_insn (work_reg, arm_hfp_rtx);
24856
24857           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24858           x = gen_frame_mem (SImode, x);
24859           emit_move_insn (x, work_reg);
24860
24861           x = gen_rtx_REG (SImode, PC_REGNUM);
24862           emit_move_insn (work_reg, x);
24863
24864           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24865           x = gen_frame_mem (SImode, x);
24866           emit_move_insn (x, work_reg);
24867         }
24868
24869       x = gen_rtx_REG (SImode, LR_REGNUM);
24870       emit_move_insn (work_reg, x);
24871
24872       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24873       x = gen_frame_mem (SImode, x);
24874       emit_move_insn (x, work_reg);
24875
24876       x = GEN_INT (offset + 12);
24877       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24878
24879       emit_move_insn (arm_hfp_rtx, work_reg);
24880     }
24881   /* Optimization:  If we are not pushing any low registers but we are going
24882      to push some high registers then delay our first push.  This will just
24883      be a push of LR and we can combine it with the push of the first high
24884      register.  */
24885   else if ((l_mask & 0xff) != 0
24886            || (high_regs_pushed == 0 && lr_needs_saving))
24887     {
24888       unsigned long mask = l_mask;
24889       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24890       insn = thumb1_emit_multi_reg_push (mask, mask);
24891       RTX_FRAME_RELATED_P (insn) = 1;
24892       lr_needs_saving = false;
24893     }
24894
24895   if (high_regs_pushed)
24896     {
24897       unsigned pushable_regs;
24898       unsigned next_hi_reg;
24899       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24900                                                  : crtl->args.info.nregs;
24901       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24902
24903       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24904         if (live_regs_mask & (1 << next_hi_reg))
24905           break;
24906
24907       /* Here we need to mask out registers used for passing arguments
24908          even if they can be pushed.  This is to avoid using them to stash the high
24909          registers.  Such kind of stash may clobber the use of arguments.  */
24910       pushable_regs = l_mask & (~arg_regs_mask);
24911       if (lr_needs_saving)
24912         pushable_regs &= ~(1 << LR_REGNUM);
24913
24914       if (pushable_regs == 0)
24915         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24916
24917       while (high_regs_pushed > 0)
24918         {
24919           unsigned long real_regs_mask = 0;
24920           unsigned long push_mask = 0;
24921
24922           for (regno = LR_REGNUM; regno >= 0; regno --)
24923             {
24924               if (pushable_regs & (1 << regno))
24925                 {
24926                   emit_move_insn (gen_rtx_REG (SImode, regno),
24927                                   gen_rtx_REG (SImode, next_hi_reg));
24928
24929                   high_regs_pushed --;
24930                   real_regs_mask |= (1 << next_hi_reg);
24931                   push_mask |= (1 << regno);
24932
24933                   if (high_regs_pushed)
24934                     {
24935                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24936                            next_hi_reg --)
24937                         if (live_regs_mask & (1 << next_hi_reg))
24938                           break;
24939                     }
24940                   else
24941                     break;
24942                 }
24943             }
24944
24945           /* If we had to find a work register and we have not yet
24946              saved the LR then add it to the list of regs to push.  */
24947           if (lr_needs_saving)
24948             {
24949               push_mask |= 1 << LR_REGNUM;
24950               real_regs_mask |= 1 << LR_REGNUM;
24951               lr_needs_saving = false;
24952             }
24953
24954           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24955           RTX_FRAME_RELATED_P (insn) = 1;
24956         }
24957     }
24958
24959   /* Load the pic register before setting the frame pointer,
24960      so we can use r7 as a temporary work register.  */
24961   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24962     arm_load_pic_register (live_regs_mask);
24963
24964   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24965     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24966                     stack_pointer_rtx);
24967
24968   size = offsets->outgoing_args - offsets->saved_args;
24969   if (flag_stack_usage_info)
24970     current_function_static_stack_size = size;
24971
24972   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
24973   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24974     sorry ("-fstack-check=specific for Thumb-1");
24975
24976   amount = offsets->outgoing_args - offsets->saved_regs;
24977   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24978   if (amount)
24979     {
24980       if (amount < 512)
24981         {
24982           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24983                                         GEN_INT (- amount)));
24984           RTX_FRAME_RELATED_P (insn) = 1;
24985         }
24986       else
24987         {
24988           rtx reg, dwarf;
24989
24990           /* The stack decrement is too big for an immediate value in a single
24991              insn.  In theory we could issue multiple subtracts, but after
24992              three of them it becomes more space efficient to place the full
24993              value in the constant pool and load into a register.  (Also the
24994              ARM debugger really likes to see only one stack decrement per
24995              function).  So instead we look for a scratch register into which
24996              we can load the decrement, and then we subtract this from the
24997              stack pointer.  Unfortunately on the thumb the only available
24998              scratch registers are the argument registers, and we cannot use
24999              these as they may hold arguments to the function.  Instead we
25000              attempt to locate a call preserved register which is used by this
25001              function.  If we can find one, then we know that it will have
25002              been pushed at the start of the prologue and so we can corrupt
25003              it now.  */
25004           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25005             if (live_regs_mask & (1 << regno))
25006               break;
25007
25008           gcc_assert(regno <= LAST_LO_REGNUM);
25009
25010           reg = gen_rtx_REG (SImode, regno);
25011
25012           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25013
25014           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25015                                         stack_pointer_rtx, reg));
25016
25017           dwarf = gen_rtx_SET (stack_pointer_rtx,
25018                                plus_constant (Pmode, stack_pointer_rtx,
25019                                               -amount));
25020           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25021           RTX_FRAME_RELATED_P (insn) = 1;
25022         }
25023     }
25024
25025   if (frame_pointer_needed)
25026     thumb_set_frame_pointer (offsets);
25027
25028   /* If we are profiling, make sure no instructions are scheduled before
25029      the call to mcount.  Similarly if the user has requested no
25030      scheduling in the prolog.  Similarly if we want non-call exceptions
25031      using the EABI unwinder, to prevent faulting instructions from being
25032      swapped with a stack adjustment.  */
25033   if (crtl->profile || !TARGET_SCHED_PROLOG
25034       || (arm_except_unwind_info (&global_options) == UI_TARGET
25035           && cfun->can_throw_non_call_exceptions))
25036     emit_insn (gen_blockage ());
25037
25038   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25039   if (live_regs_mask & 0xff)
25040     cfun->machine->lr_save_eliminated = 0;
25041 }
25042
25043 /* Clear caller saved registers not used to pass return values and leaked
25044    condition flags before exiting a cmse_nonsecure_entry function.  */
25045
25046 void
25047 cmse_nonsecure_entry_clear_before_return (void)
25048 {
25049   uint64_t to_clear_mask[2];
25050   uint32_t padding_bits_to_clear = 0;
25051   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25052   int regno, maxregno = IP_REGNUM;
25053   tree result_type;
25054   rtx result_rtl;
25055
25056   to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25057   to_clear_mask[0] |= (1ULL << IP_REGNUM);
25058
25059   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25060      registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25061      to make sure the instructions used to clear them are present.  */
25062   if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25063     {
25064       uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25065       maxregno = LAST_VFP_REGNUM;
25066
25067       float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25068       to_clear_mask[0] |= float_mask;
25069
25070       float_mask = (1ULL << (maxregno - 63)) - 1;
25071       to_clear_mask[1] = float_mask;
25072
25073       /* Make sure we don't clear the two scratch registers used to clear the
25074          relevant FPSCR bits in output_return_instruction.  */
25075       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25076       to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25077       emit_use (gen_rtx_REG (SImode, 4));
25078       to_clear_mask[0] &= ~(1ULL << 4);
25079     }
25080
25081   /* If the user has defined registers to be caller saved, these are no longer
25082      restored by the function before returning and must thus be cleared for
25083      security purposes.  */
25084   for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25085     {
25086       /* We do not touch registers that can be used to pass arguments as per
25087          the AAPCS, since these should never be made callee-saved by user
25088          options.  */
25089       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25090         continue;
25091       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25092         continue;
25093       if (call_used_regs[regno])
25094         to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25095     }
25096
25097   /* Make sure we do not clear the registers used to return the result in.  */
25098   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25099   if (!VOID_TYPE_P (result_type))
25100     {
25101       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25102
25103       /* No need to check that we return in registers, because we don't
25104          support returning on stack yet.  */
25105       to_clear_mask[0]
25106         &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25107                                        padding_bits_to_clear_ptr);
25108     }
25109
25110   if (padding_bits_to_clear != 0)
25111     {
25112       rtx reg_rtx;
25113       /* Padding bits to clear is not 0 so we know we are dealing with
25114          returning a composite type, which only uses r0.  Let's make sure that
25115          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25116       gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25117
25118       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25119
25120       /* Fill the lower half of the negated padding_bits_to_clear.  */
25121       emit_move_insn (reg_rtx,
25122                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25123
25124       /* Also fill the top half of the negated padding_bits_to_clear.  */
25125       if (((~padding_bits_to_clear) >> 16) > 0)
25126         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25127                                                       GEN_INT (16),
25128                                                       GEN_INT (16)),
25129                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25130
25131       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25132                            gen_rtx_REG (SImode, R0_REGNUM),
25133                            reg_rtx));
25134     }
25135
25136   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25137     {
25138       if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25139         continue;
25140
25141       if (IS_VFP_REGNUM (regno))
25142         {
25143           /* If regno is an even vfp register and its successor is also to
25144              be cleared, use vmov.  */
25145           if (TARGET_VFP_DOUBLE
25146               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25147               && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25148             {
25149               emit_move_insn (gen_rtx_REG (DFmode, regno),
25150                               CONST1_RTX (DFmode));
25151               emit_use (gen_rtx_REG (DFmode, regno));
25152               regno++;
25153             }
25154           else
25155             {
25156               emit_move_insn (gen_rtx_REG (SFmode, regno),
25157                               CONST1_RTX (SFmode));
25158               emit_use (gen_rtx_REG (SFmode, regno));
25159             }
25160         }
25161       else
25162         {
25163           if (TARGET_THUMB1)
25164             {
25165               if (regno == R0_REGNUM)
25166                 emit_move_insn (gen_rtx_REG (SImode, regno),
25167                                 const0_rtx);
25168               else
25169                 /* R0 has either been cleared before, see code above, or it
25170                    holds a return value, either way it is not secret
25171                    information.  */
25172                 emit_move_insn (gen_rtx_REG (SImode, regno),
25173                                 gen_rtx_REG (SImode, R0_REGNUM));
25174               emit_use (gen_rtx_REG (SImode, regno));
25175             }
25176           else
25177             {
25178               emit_move_insn (gen_rtx_REG (SImode, regno),
25179                               gen_rtx_REG (SImode, LR_REGNUM));
25180               emit_use (gen_rtx_REG (SImode, regno));
25181             }
25182         }
25183     }
25184 }
25185
25186 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25187    POP instruction can be generated.  LR should be replaced by PC.  All
25188    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25189    all we really need to check here is if single register is to be
25190    returned, or multiple register return.  */
25191 void
25192 thumb2_expand_return (bool simple_return)
25193 {
25194   int i, num_regs;
25195   unsigned long saved_regs_mask;
25196   arm_stack_offsets *offsets;
25197
25198   offsets = arm_get_frame_offsets ();
25199   saved_regs_mask = offsets->saved_regs_mask;
25200
25201   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25202     if (saved_regs_mask & (1 << i))
25203       num_regs++;
25204
25205   if (!simple_return && saved_regs_mask)
25206     {
25207       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25208          functions or adapt code to handle according to ACLE.  This path should
25209          not be reachable for cmse_nonsecure_entry functions though we prefer
25210          to assert it for now to ensure that future code changes do not silently
25211          change this behavior.  */
25212       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25213       if (num_regs == 1)
25214         {
25215           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25216           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25217           rtx addr = gen_rtx_MEM (SImode,
25218                                   gen_rtx_POST_INC (SImode,
25219                                                     stack_pointer_rtx));
25220           set_mem_alias_set (addr, get_frame_alias_set ());
25221           XVECEXP (par, 0, 0) = ret_rtx;
25222           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25223           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25224           emit_jump_insn (par);
25225         }
25226       else
25227         {
25228           saved_regs_mask &= ~ (1 << LR_REGNUM);
25229           saved_regs_mask |=   (1 << PC_REGNUM);
25230           arm_emit_multi_reg_pop (saved_regs_mask);
25231         }
25232     }
25233   else
25234     {
25235       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25236         cmse_nonsecure_entry_clear_before_return ();
25237       emit_jump_insn (simple_return_rtx);
25238     }
25239 }
25240
25241 void
25242 thumb1_expand_epilogue (void)
25243 {
25244   HOST_WIDE_INT amount;
25245   arm_stack_offsets *offsets;
25246   int regno;
25247
25248   /* Naked functions don't have prologues.  */
25249   if (IS_NAKED (arm_current_func_type ()))
25250     return;
25251
25252   offsets = arm_get_frame_offsets ();
25253   amount = offsets->outgoing_args - offsets->saved_regs;
25254
25255   if (frame_pointer_needed)
25256     {
25257       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25258       amount = offsets->locals_base - offsets->saved_regs;
25259     }
25260   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25261
25262   gcc_assert (amount >= 0);
25263   if (amount)
25264     {
25265       emit_insn (gen_blockage ());
25266
25267       if (amount < 512)
25268         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25269                                GEN_INT (amount)));
25270       else
25271         {
25272           /* r3 is always free in the epilogue.  */
25273           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25274
25275           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25276           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25277         }
25278     }
25279
25280   /* Emit a USE (stack_pointer_rtx), so that
25281      the stack adjustment will not be deleted.  */
25282   emit_insn (gen_force_register_use (stack_pointer_rtx));
25283
25284   if (crtl->profile || !TARGET_SCHED_PROLOG)
25285     emit_insn (gen_blockage ());
25286
25287   /* Emit a clobber for each insn that will be restored in the epilogue,
25288      so that flow2 will get register lifetimes correct.  */
25289   for (regno = 0; regno < 13; regno++)
25290     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25291       emit_clobber (gen_rtx_REG (SImode, regno));
25292
25293   if (! df_regs_ever_live_p (LR_REGNUM))
25294     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25295
25296   /* Clear all caller-saved regs that are not used to return.  */
25297   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25298     cmse_nonsecure_entry_clear_before_return ();
25299 }
25300
25301 /* Epilogue code for APCS frame.  */
25302 static void
25303 arm_expand_epilogue_apcs_frame (bool really_return)
25304 {
25305   unsigned long func_type;
25306   unsigned long saved_regs_mask;
25307   int num_regs = 0;
25308   int i;
25309   int floats_from_frame = 0;
25310   arm_stack_offsets *offsets;
25311
25312   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25313   func_type = arm_current_func_type ();
25314
25315   /* Get frame offsets for ARM.  */
25316   offsets = arm_get_frame_offsets ();
25317   saved_regs_mask = offsets->saved_regs_mask;
25318
25319   /* Find the offset of the floating-point save area in the frame.  */
25320   floats_from_frame
25321     = (offsets->saved_args
25322        + arm_compute_static_chain_stack_bytes ()
25323        - offsets->frame);
25324
25325   /* Compute how many core registers saved and how far away the floats are.  */
25326   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25327     if (saved_regs_mask & (1 << i))
25328       {
25329         num_regs++;
25330         floats_from_frame += 4;
25331       }
25332
25333   if (TARGET_HARD_FLOAT)
25334     {
25335       int start_reg;
25336       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25337
25338       /* The offset is from IP_REGNUM.  */
25339       int saved_size = arm_get_vfp_saved_size ();
25340       if (saved_size > 0)
25341         {
25342           rtx_insn *insn;
25343           floats_from_frame += saved_size;
25344           insn = emit_insn (gen_addsi3 (ip_rtx,
25345                                         hard_frame_pointer_rtx,
25346                                         GEN_INT (-floats_from_frame)));
25347           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25348                                        ip_rtx, hard_frame_pointer_rtx);
25349         }
25350
25351       /* Generate VFP register multi-pop.  */
25352       start_reg = FIRST_VFP_REGNUM;
25353
25354       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25355         /* Look for a case where a reg does not need restoring.  */
25356         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25357             && (!df_regs_ever_live_p (i + 1)
25358                 || call_used_regs[i + 1]))
25359           {
25360             if (start_reg != i)
25361               arm_emit_vfp_multi_reg_pop (start_reg,
25362                                           (i - start_reg) / 2,
25363                                           gen_rtx_REG (SImode,
25364                                                        IP_REGNUM));
25365             start_reg = i + 2;
25366           }
25367
25368       /* Restore the remaining regs that we have discovered (or possibly
25369          even all of them, if the conditional in the for loop never
25370          fired).  */
25371       if (start_reg != i)
25372         arm_emit_vfp_multi_reg_pop (start_reg,
25373                                     (i - start_reg) / 2,
25374                                     gen_rtx_REG (SImode, IP_REGNUM));
25375     }
25376
25377   if (TARGET_IWMMXT)
25378     {
25379       /* The frame pointer is guaranteed to be non-double-word aligned, as
25380          it is set to double-word-aligned old_stack_pointer - 4.  */
25381       rtx_insn *insn;
25382       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25383
25384       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25385         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25386           {
25387             rtx addr = gen_frame_mem (V2SImode,
25388                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25389                                                 - lrm_count * 4));
25390             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25391             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25392                                                gen_rtx_REG (V2SImode, i),
25393                                                NULL_RTX);
25394             lrm_count += 2;
25395           }
25396     }
25397
25398   /* saved_regs_mask should contain IP which contains old stack pointer
25399      at the time of activation creation.  Since SP and IP are adjacent registers,
25400      we can restore the value directly into SP.  */
25401   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25402   saved_regs_mask &= ~(1 << IP_REGNUM);
25403   saved_regs_mask |= (1 << SP_REGNUM);
25404
25405   /* There are two registers left in saved_regs_mask - LR and PC.  We
25406      only need to restore LR (the return address), but to
25407      save time we can load it directly into PC, unless we need a
25408      special function exit sequence, or we are not really returning.  */
25409   if (really_return
25410       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25411       && !crtl->calls_eh_return)
25412     /* Delete LR from the register mask, so that LR on
25413        the stack is loaded into the PC in the register mask.  */
25414     saved_regs_mask &= ~(1 << LR_REGNUM);
25415   else
25416     saved_regs_mask &= ~(1 << PC_REGNUM);
25417
25418   num_regs = bit_count (saved_regs_mask);
25419   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25420     {
25421       rtx_insn *insn;
25422       emit_insn (gen_blockage ());
25423       /* Unwind the stack to just below the saved registers.  */
25424       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25425                                     hard_frame_pointer_rtx,
25426                                     GEN_INT (- 4 * num_regs)));
25427
25428       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25429                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25430     }
25431
25432   arm_emit_multi_reg_pop (saved_regs_mask);
25433
25434   if (IS_INTERRUPT (func_type))
25435     {
25436       /* Interrupt handlers will have pushed the
25437          IP onto the stack, so restore it now.  */
25438       rtx_insn *insn;
25439       rtx addr = gen_rtx_MEM (SImode,
25440                               gen_rtx_POST_INC (SImode,
25441                               stack_pointer_rtx));
25442       set_mem_alias_set (addr, get_frame_alias_set ());
25443       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25444       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25445                                          gen_rtx_REG (SImode, IP_REGNUM),
25446                                          NULL_RTX);
25447     }
25448
25449   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25450     return;
25451
25452   if (crtl->calls_eh_return)
25453     emit_insn (gen_addsi3 (stack_pointer_rtx,
25454                            stack_pointer_rtx,
25455                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25456
25457   if (IS_STACKALIGN (func_type))
25458     /* Restore the original stack pointer.  Before prologue, the stack was
25459        realigned and the original stack pointer saved in r0.  For details,
25460        see comment in arm_expand_prologue.  */
25461     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25462
25463   emit_jump_insn (simple_return_rtx);
25464 }
25465
25466 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25467    function is not a sibcall.  */
25468 void
25469 arm_expand_epilogue (bool really_return)
25470 {
25471   unsigned long func_type;
25472   unsigned long saved_regs_mask;
25473   int num_regs = 0;
25474   int i;
25475   int amount;
25476   arm_stack_offsets *offsets;
25477
25478   func_type = arm_current_func_type ();
25479
25480   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25481      let output_return_instruction take care of instruction emission if any.  */
25482   if (IS_NAKED (func_type)
25483       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25484     {
25485       if (really_return)
25486         emit_jump_insn (simple_return_rtx);
25487       return;
25488     }
25489
25490   /* If we are throwing an exception, then we really must be doing a
25491      return, so we can't tail-call.  */
25492   gcc_assert (!crtl->calls_eh_return || really_return);
25493
25494   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25495     {
25496       arm_expand_epilogue_apcs_frame (really_return);
25497       return;
25498     }
25499
25500   /* Get frame offsets for ARM.  */
25501   offsets = arm_get_frame_offsets ();
25502   saved_regs_mask = offsets->saved_regs_mask;
25503   num_regs = bit_count (saved_regs_mask);
25504
25505   if (frame_pointer_needed)
25506     {
25507       rtx_insn *insn;
25508       /* Restore stack pointer if necessary.  */
25509       if (TARGET_ARM)
25510         {
25511           /* In ARM mode, frame pointer points to first saved register.
25512              Restore stack pointer to last saved register.  */
25513           amount = offsets->frame - offsets->saved_regs;
25514
25515           /* Force out any pending memory operations that reference stacked data
25516              before stack de-allocation occurs.  */
25517           emit_insn (gen_blockage ());
25518           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25519                             hard_frame_pointer_rtx,
25520                             GEN_INT (amount)));
25521           arm_add_cfa_adjust_cfa_note (insn, amount,
25522                                        stack_pointer_rtx,
25523                                        hard_frame_pointer_rtx);
25524
25525           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25526              deleted.  */
25527           emit_insn (gen_force_register_use (stack_pointer_rtx));
25528         }
25529       else
25530         {
25531           /* In Thumb-2 mode, the frame pointer points to the last saved
25532              register.  */
25533           amount = offsets->locals_base - offsets->saved_regs;
25534           if (amount)
25535             {
25536               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25537                                 hard_frame_pointer_rtx,
25538                                 GEN_INT (amount)));
25539               arm_add_cfa_adjust_cfa_note (insn, amount,
25540                                            hard_frame_pointer_rtx,
25541                                            hard_frame_pointer_rtx);
25542             }
25543
25544           /* Force out any pending memory operations that reference stacked data
25545              before stack de-allocation occurs.  */
25546           emit_insn (gen_blockage ());
25547           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25548                                        hard_frame_pointer_rtx));
25549           arm_add_cfa_adjust_cfa_note (insn, 0,
25550                                        stack_pointer_rtx,
25551                                        hard_frame_pointer_rtx);
25552           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25553              deleted.  */
25554           emit_insn (gen_force_register_use (stack_pointer_rtx));
25555         }
25556     }
25557   else
25558     {
25559       /* Pop off outgoing args and local frame to adjust stack pointer to
25560          last saved register.  */
25561       amount = offsets->outgoing_args - offsets->saved_regs;
25562       if (amount)
25563         {
25564           rtx_insn *tmp;
25565           /* Force out any pending memory operations that reference stacked data
25566              before stack de-allocation occurs.  */
25567           emit_insn (gen_blockage ());
25568           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25569                                        stack_pointer_rtx,
25570                                        GEN_INT (amount)));
25571           arm_add_cfa_adjust_cfa_note (tmp, amount,
25572                                        stack_pointer_rtx, stack_pointer_rtx);
25573           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25574              not deleted.  */
25575           emit_insn (gen_force_register_use (stack_pointer_rtx));
25576         }
25577     }
25578
25579   if (TARGET_HARD_FLOAT)
25580     {
25581       /* Generate VFP register multi-pop.  */
25582       int end_reg = LAST_VFP_REGNUM + 1;
25583
25584       /* Scan the registers in reverse order.  We need to match
25585          any groupings made in the prologue and generate matching
25586          vldm operations.  The need to match groups is because,
25587          unlike pop, vldm can only do consecutive regs.  */
25588       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25589         /* Look for a case where a reg does not need restoring.  */
25590         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25591             && (!df_regs_ever_live_p (i + 1)
25592                 || call_used_regs[i + 1]))
25593           {
25594             /* Restore the regs discovered so far (from reg+2 to
25595                end_reg).  */
25596             if (end_reg > i + 2)
25597               arm_emit_vfp_multi_reg_pop (i + 2,
25598                                           (end_reg - (i + 2)) / 2,
25599                                           stack_pointer_rtx);
25600             end_reg = i;
25601           }
25602
25603       /* Restore the remaining regs that we have discovered (or possibly
25604          even all of them, if the conditional in the for loop never
25605          fired).  */
25606       if (end_reg > i + 2)
25607         arm_emit_vfp_multi_reg_pop (i + 2,
25608                                     (end_reg - (i + 2)) / 2,
25609                                     stack_pointer_rtx);
25610     }
25611
25612   if (TARGET_IWMMXT)
25613     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25614       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25615         {
25616           rtx_insn *insn;
25617           rtx addr = gen_rtx_MEM (V2SImode,
25618                                   gen_rtx_POST_INC (SImode,
25619                                                     stack_pointer_rtx));
25620           set_mem_alias_set (addr, get_frame_alias_set ());
25621           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25622           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25623                                              gen_rtx_REG (V2SImode, i),
25624                                              NULL_RTX);
25625           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25626                                        stack_pointer_rtx, stack_pointer_rtx);
25627         }
25628
25629   if (saved_regs_mask)
25630     {
25631       rtx insn;
25632       bool return_in_pc = false;
25633
25634       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25635           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25636           && !IS_CMSE_ENTRY (func_type)
25637           && !IS_STACKALIGN (func_type)
25638           && really_return
25639           && crtl->args.pretend_args_size == 0
25640           && saved_regs_mask & (1 << LR_REGNUM)
25641           && !crtl->calls_eh_return)
25642         {
25643           saved_regs_mask &= ~(1 << LR_REGNUM);
25644           saved_regs_mask |= (1 << PC_REGNUM);
25645           return_in_pc = true;
25646         }
25647
25648       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25649         {
25650           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25651             if (saved_regs_mask & (1 << i))
25652               {
25653                 rtx addr = gen_rtx_MEM (SImode,
25654                                         gen_rtx_POST_INC (SImode,
25655                                                           stack_pointer_rtx));
25656                 set_mem_alias_set (addr, get_frame_alias_set ());
25657
25658                 if (i == PC_REGNUM)
25659                   {
25660                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25661                     XVECEXP (insn, 0, 0) = ret_rtx;
25662                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25663                                                         addr);
25664                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25665                     insn = emit_jump_insn (insn);
25666                   }
25667                 else
25668                   {
25669                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25670                                                  addr));
25671                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25672                                                        gen_rtx_REG (SImode, i),
25673                                                        NULL_RTX);
25674                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25675                                                  stack_pointer_rtx,
25676                                                  stack_pointer_rtx);
25677                   }
25678               }
25679         }
25680       else
25681         {
25682           if (TARGET_LDRD
25683               && current_tune->prefer_ldrd_strd
25684               && !optimize_function_for_size_p (cfun))
25685             {
25686               if (TARGET_THUMB2)
25687                 thumb2_emit_ldrd_pop (saved_regs_mask);
25688               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25689                 arm_emit_ldrd_pop (saved_regs_mask);
25690               else
25691                 arm_emit_multi_reg_pop (saved_regs_mask);
25692             }
25693           else
25694             arm_emit_multi_reg_pop (saved_regs_mask);
25695         }
25696
25697       if (return_in_pc)
25698         return;
25699     }
25700
25701   amount
25702     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25703   if (amount)
25704     {
25705       int i, j;
25706       rtx dwarf = NULL_RTX;
25707       rtx_insn *tmp =
25708         emit_insn (gen_addsi3 (stack_pointer_rtx,
25709                                stack_pointer_rtx,
25710                                GEN_INT (amount)));
25711
25712       RTX_FRAME_RELATED_P (tmp) = 1;
25713
25714       if (cfun->machine->uses_anonymous_args)
25715         {
25716           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25717              pretend_args in stack.  */
25718           int num_regs = crtl->args.pretend_args_size / 4;
25719           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25720           for (j = 0, i = 0; j < num_regs; i++)
25721             if (saved_regs_mask & (1 << i))
25722               {
25723                 rtx reg = gen_rtx_REG (SImode, i);
25724                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25725                 j++;
25726               }
25727           REG_NOTES (tmp) = dwarf;
25728         }
25729       arm_add_cfa_adjust_cfa_note (tmp, amount,
25730                                    stack_pointer_rtx, stack_pointer_rtx);
25731     }
25732
25733     /* Clear all caller-saved regs that are not used to return.  */
25734     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25735       {
25736         /* CMSE_ENTRY always returns.  */
25737         gcc_assert (really_return);
25738         cmse_nonsecure_entry_clear_before_return ();
25739       }
25740
25741   if (!really_return)
25742     return;
25743
25744   if (crtl->calls_eh_return)
25745     emit_insn (gen_addsi3 (stack_pointer_rtx,
25746                            stack_pointer_rtx,
25747                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25748
25749   if (IS_STACKALIGN (func_type))
25750     /* Restore the original stack pointer.  Before prologue, the stack was
25751        realigned and the original stack pointer saved in r0.  For details,
25752        see comment in arm_expand_prologue.  */
25753     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25754
25755   emit_jump_insn (simple_return_rtx);
25756 }
25757
25758 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25759    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25760
25761 const char *
25762 thumb1_output_interwork (void)
25763 {
25764   const char * name;
25765   FILE *f = asm_out_file;
25766
25767   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25768   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25769               == SYMBOL_REF);
25770   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25771
25772   /* Generate code sequence to switch us into Thumb mode.  */
25773   /* The .code 32 directive has already been emitted by
25774      ASM_DECLARE_FUNCTION_NAME.  */
25775   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25776   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25777
25778   /* Generate a label, so that the debugger will notice the
25779      change in instruction sets.  This label is also used by
25780      the assembler to bypass the ARM code when this function
25781      is called from a Thumb encoded function elsewhere in the
25782      same file.  Hence the definition of STUB_NAME here must
25783      agree with the definition in gas/config/tc-arm.c.  */
25784
25785 #define STUB_NAME ".real_start_of"
25786
25787   fprintf (f, "\t.code\t16\n");
25788 #ifdef ARM_PE
25789   if (arm_dllexport_name_p (name))
25790     name = arm_strip_name_encoding (name);
25791 #endif
25792   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25793   fprintf (f, "\t.thumb_func\n");
25794   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25795
25796   return "";
25797 }
25798
25799 /* Handle the case of a double word load into a low register from
25800    a computed memory address.  The computed address may involve a
25801    register which is overwritten by the load.  */
25802 const char *
25803 thumb_load_double_from_address (rtx *operands)
25804 {
25805   rtx addr;
25806   rtx base;
25807   rtx offset;
25808   rtx arg1;
25809   rtx arg2;
25810
25811   gcc_assert (REG_P (operands[0]));
25812   gcc_assert (MEM_P (operands[1]));
25813
25814   /* Get the memory address.  */
25815   addr = XEXP (operands[1], 0);
25816
25817   /* Work out how the memory address is computed.  */
25818   switch (GET_CODE (addr))
25819     {
25820     case REG:
25821       operands[2] = adjust_address (operands[1], SImode, 4);
25822
25823       if (REGNO (operands[0]) == REGNO (addr))
25824         {
25825           output_asm_insn ("ldr\t%H0, %2", operands);
25826           output_asm_insn ("ldr\t%0, %1", operands);
25827         }
25828       else
25829         {
25830           output_asm_insn ("ldr\t%0, %1", operands);
25831           output_asm_insn ("ldr\t%H0, %2", operands);
25832         }
25833       break;
25834
25835     case CONST:
25836       /* Compute <address> + 4 for the high order load.  */
25837       operands[2] = adjust_address (operands[1], SImode, 4);
25838
25839       output_asm_insn ("ldr\t%0, %1", operands);
25840       output_asm_insn ("ldr\t%H0, %2", operands);
25841       break;
25842
25843     case PLUS:
25844       arg1   = XEXP (addr, 0);
25845       arg2   = XEXP (addr, 1);
25846
25847       if (CONSTANT_P (arg1))
25848         base = arg2, offset = arg1;
25849       else
25850         base = arg1, offset = arg2;
25851
25852       gcc_assert (REG_P (base));
25853
25854       /* Catch the case of <address> = <reg> + <reg> */
25855       if (REG_P (offset))
25856         {
25857           int reg_offset = REGNO (offset);
25858           int reg_base   = REGNO (base);
25859           int reg_dest   = REGNO (operands[0]);
25860
25861           /* Add the base and offset registers together into the
25862              higher destination register.  */
25863           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25864                        reg_dest + 1, reg_base, reg_offset);
25865
25866           /* Load the lower destination register from the address in
25867              the higher destination register.  */
25868           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25869                        reg_dest, reg_dest + 1);
25870
25871           /* Load the higher destination register from its own address
25872              plus 4.  */
25873           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25874                        reg_dest + 1, reg_dest + 1);
25875         }
25876       else
25877         {
25878           /* Compute <address> + 4 for the high order load.  */
25879           operands[2] = adjust_address (operands[1], SImode, 4);
25880
25881           /* If the computed address is held in the low order register
25882              then load the high order register first, otherwise always
25883              load the low order register first.  */
25884           if (REGNO (operands[0]) == REGNO (base))
25885             {
25886               output_asm_insn ("ldr\t%H0, %2", operands);
25887               output_asm_insn ("ldr\t%0, %1", operands);
25888             }
25889           else
25890             {
25891               output_asm_insn ("ldr\t%0, %1", operands);
25892               output_asm_insn ("ldr\t%H0, %2", operands);
25893             }
25894         }
25895       break;
25896
25897     case LABEL_REF:
25898       /* With no registers to worry about we can just load the value
25899          directly.  */
25900       operands[2] = adjust_address (operands[1], SImode, 4);
25901
25902       output_asm_insn ("ldr\t%H0, %2", operands);
25903       output_asm_insn ("ldr\t%0, %1", operands);
25904       break;
25905
25906     default:
25907       gcc_unreachable ();
25908     }
25909
25910   return "";
25911 }
25912
25913 const char *
25914 thumb_output_move_mem_multiple (int n, rtx *operands)
25915 {
25916   switch (n)
25917     {
25918     case 2:
25919       if (REGNO (operands[4]) > REGNO (operands[5]))
25920         std::swap (operands[4], operands[5]);
25921
25922       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25923       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25924       break;
25925
25926     case 3:
25927       if (REGNO (operands[4]) > REGNO (operands[5]))
25928         std::swap (operands[4], operands[5]);
25929       if (REGNO (operands[5]) > REGNO (operands[6]))
25930         std::swap (operands[5], operands[6]);
25931       if (REGNO (operands[4]) > REGNO (operands[5]))
25932         std::swap (operands[4], operands[5]);
25933
25934       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25935       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25936       break;
25937
25938     default:
25939       gcc_unreachable ();
25940     }
25941
25942   return "";
25943 }
25944
25945 /* Output a call-via instruction for thumb state.  */
25946 const char *
25947 thumb_call_via_reg (rtx reg)
25948 {
25949   int regno = REGNO (reg);
25950   rtx *labelp;
25951
25952   gcc_assert (regno < LR_REGNUM);
25953
25954   /* If we are in the normal text section we can use a single instance
25955      per compilation unit.  If we are doing function sections, then we need
25956      an entry per section, since we can't rely on reachability.  */
25957   if (in_section == text_section)
25958     {
25959       thumb_call_reg_needed = 1;
25960
25961       if (thumb_call_via_label[regno] == NULL)
25962         thumb_call_via_label[regno] = gen_label_rtx ();
25963       labelp = thumb_call_via_label + regno;
25964     }
25965   else
25966     {
25967       if (cfun->machine->call_via[regno] == NULL)
25968         cfun->machine->call_via[regno] = gen_label_rtx ();
25969       labelp = cfun->machine->call_via + regno;
25970     }
25971
25972   output_asm_insn ("bl\t%a0", labelp);
25973   return "";
25974 }
25975
25976 /* Routines for generating rtl.  */
25977 void
25978 thumb_expand_movmemqi (rtx *operands)
25979 {
25980   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25981   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25982   HOST_WIDE_INT len = INTVAL (operands[2]);
25983   HOST_WIDE_INT offset = 0;
25984
25985   while (len >= 12)
25986     {
25987       emit_insn (gen_movmem12b (out, in, out, in));
25988       len -= 12;
25989     }
25990
25991   if (len >= 8)
25992     {
25993       emit_insn (gen_movmem8b (out, in, out, in));
25994       len -= 8;
25995     }
25996
25997   if (len >= 4)
25998     {
25999       rtx reg = gen_reg_rtx (SImode);
26000       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26001       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26002       len -= 4;
26003       offset += 4;
26004     }
26005
26006   if (len >= 2)
26007     {
26008       rtx reg = gen_reg_rtx (HImode);
26009       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26010                                               plus_constant (Pmode, in,
26011                                                              offset))));
26012       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26013                                                                 offset)),
26014                             reg));
26015       len -= 2;
26016       offset += 2;
26017     }
26018
26019   if (len)
26020     {
26021       rtx reg = gen_reg_rtx (QImode);
26022       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26023                                               plus_constant (Pmode, in,
26024                                                              offset))));
26025       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26026                                                                 offset)),
26027                             reg));
26028     }
26029 }
26030
26031 void
26032 thumb_reload_out_hi (rtx *operands)
26033 {
26034   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26035 }
26036
26037 /* Return the length of a function name prefix
26038     that starts with the character 'c'.  */
26039 static int
26040 arm_get_strip_length (int c)
26041 {
26042   switch (c)
26043     {
26044     ARM_NAME_ENCODING_LENGTHS
26045       default: return 0;
26046     }
26047 }
26048
26049 /* Return a pointer to a function's name with any
26050    and all prefix encodings stripped from it.  */
26051 const char *
26052 arm_strip_name_encoding (const char *name)
26053 {
26054   int skip;
26055
26056   while ((skip = arm_get_strip_length (* name)))
26057     name += skip;
26058
26059   return name;
26060 }
26061
26062 /* If there is a '*' anywhere in the name's prefix, then
26063    emit the stripped name verbatim, otherwise prepend an
26064    underscore if leading underscores are being used.  */
26065 void
26066 arm_asm_output_labelref (FILE *stream, const char *name)
26067 {
26068   int skip;
26069   int verbatim = 0;
26070
26071   while ((skip = arm_get_strip_length (* name)))
26072     {
26073       verbatim |= (*name == '*');
26074       name += skip;
26075     }
26076
26077   if (verbatim)
26078     fputs (name, stream);
26079   else
26080     asm_fprintf (stream, "%U%s", name);
26081 }
26082
26083 /* This function is used to emit an EABI tag and its associated value.
26084    We emit the numerical value of the tag in case the assembler does not
26085    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26086    the tag name in a comment so that anyone reading the assembler output
26087    will know which tag is being set.
26088
26089    This function is not static because arm-c.c needs it too.  */
26090
26091 void
26092 arm_emit_eabi_attribute (const char *name, int num, int val)
26093 {
26094   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26095   if (flag_verbose_asm || flag_debug_asm)
26096     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26097   asm_fprintf (asm_out_file, "\n");
26098 }
26099
26100 /* This function is used to print CPU tuning information as comment
26101    in assembler file.  Pointers are not printed for now.  */
26102
26103 void
26104 arm_print_tune_info (void)
26105 {
26106   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26107   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26108                current_tune->constant_limit);
26109   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26110                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26111   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26112                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26113   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26114                "prefetch.l1_cache_size:\t%d\n",
26115                current_tune->prefetch.l1_cache_size);
26116   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26117                "prefetch.l1_cache_line_size:\t%d\n",
26118                current_tune->prefetch.l1_cache_line_size);
26119   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26120                "prefer_constant_pool:\t%d\n",
26121                (int) current_tune->prefer_constant_pool);
26122   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26123                "branch_cost:\t(s:speed, p:predictable)\n");
26124   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26125   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26126                current_tune->branch_cost (false, false));
26127   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26128                current_tune->branch_cost (false, true));
26129   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26130                current_tune->branch_cost (true, false));
26131   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26132                current_tune->branch_cost (true, true));
26133   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26134                "prefer_ldrd_strd:\t%d\n",
26135                (int) current_tune->prefer_ldrd_strd);
26136   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26137                "logical_op_non_short_circuit:\t[%d,%d]\n",
26138                (int) current_tune->logical_op_non_short_circuit_thumb,
26139                (int) current_tune->logical_op_non_short_circuit_arm);
26140   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26141                "prefer_neon_for_64bits:\t%d\n",
26142                (int) current_tune->prefer_neon_for_64bits);
26143   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26144                "disparage_flag_setting_t16_encodings:\t%d\n",
26145                (int) current_tune->disparage_flag_setting_t16_encodings);
26146   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26147                "string_ops_prefer_neon:\t%d\n",
26148                (int) current_tune->string_ops_prefer_neon);
26149   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26150                "max_insns_inline_memset:\t%d\n",
26151                current_tune->max_insns_inline_memset);
26152   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26153                current_tune->fusible_ops);
26154   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26155                (int) current_tune->sched_autopref);
26156 }
26157
26158 /* Print .arch and .arch_extension directives corresponding to the
26159    current architecture configuration.  */
26160 static void
26161 arm_print_asm_arch_directives ()
26162 {
26163   const arch_option *arch
26164     = arm_parse_arch_option_name (all_architectures, "-march",
26165                                   arm_active_target.arch_name);
26166   auto_sbitmap opt_bits (isa_num_bits);
26167
26168   gcc_assert (arch);
26169
26170   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26171   if (!arch->common.extensions)
26172     return;
26173
26174   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26175        opt->name != NULL;
26176        opt++)
26177     {
26178       if (!opt->remove)
26179         {
26180           arm_initialize_isa (opt_bits, opt->isa_bits);
26181
26182           /* If every feature bit of this option is set in the target
26183              ISA specification, print out the option name.  However,
26184              don't print anything if all the bits are part of the
26185              FPU specification.  */
26186           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26187               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26188             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26189         }
26190     }
26191 }
26192
26193 static void
26194 arm_file_start (void)
26195 {
26196   int val;
26197
26198   if (TARGET_BPABI)
26199     {
26200       /* We don't have a specified CPU.  Use the architecture to
26201          generate the tags.
26202
26203          Note: it might be better to do this unconditionally, then the
26204          assembler would not need to know about all new CPU names as
26205          they are added.  */
26206       if (!arm_active_target.core_name)
26207         {
26208           /* armv7ve doesn't support any extensions.  */
26209           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26210             {
26211               /* Keep backward compatability for assemblers
26212                  which don't support armv7ve.  */
26213               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26214               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26215               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26216               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26217               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26218             }
26219           else
26220             arm_print_asm_arch_directives ();
26221         }
26222       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26223         asm_fprintf (asm_out_file, "\t.arch %s\n",
26224                      arm_active_target.core_name + 8);
26225       else
26226         {
26227           const char* truncated_name
26228             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26229           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26230         }
26231
26232       if (print_tune_info)
26233         arm_print_tune_info ();
26234
26235       if (! TARGET_SOFT_FLOAT)
26236         {
26237           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26238             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26239
26240           if (TARGET_HARD_FLOAT_ABI)
26241             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26242         }
26243
26244       /* Some of these attributes only apply when the corresponding features
26245          are used.  However we don't have any easy way of figuring this out.
26246          Conservatively record the setting that would have been used.  */
26247
26248       if (flag_rounding_math)
26249         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26250
26251       if (!flag_unsafe_math_optimizations)
26252         {
26253           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26254           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26255         }
26256       if (flag_signaling_nans)
26257         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26258
26259       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26260                            flag_finite_math_only ? 1 : 3);
26261
26262       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26263       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26264       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26265                                flag_short_enums ? 1 : 2);
26266
26267       /* Tag_ABI_optimization_goals.  */
26268       if (optimize_size)
26269         val = 4;
26270       else if (optimize >= 2)
26271         val = 2;
26272       else if (optimize)
26273         val = 1;
26274       else
26275         val = 6;
26276       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26277
26278       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26279                                unaligned_access);
26280
26281       if (arm_fp16_format)
26282         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26283                              (int) arm_fp16_format);
26284
26285       if (arm_lang_output_object_attributes_hook)
26286         arm_lang_output_object_attributes_hook();
26287     }
26288
26289   default_file_start ();
26290 }
26291
26292 static void
26293 arm_file_end (void)
26294 {
26295   int regno;
26296
26297   if (NEED_INDICATE_EXEC_STACK)
26298     /* Add .note.GNU-stack.  */
26299     file_end_indicate_exec_stack ();
26300
26301   if (! thumb_call_reg_needed)
26302     return;
26303
26304   switch_to_section (text_section);
26305   asm_fprintf (asm_out_file, "\t.code 16\n");
26306   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26307
26308   for (regno = 0; regno < LR_REGNUM; regno++)
26309     {
26310       rtx label = thumb_call_via_label[regno];
26311
26312       if (label != 0)
26313         {
26314           targetm.asm_out.internal_label (asm_out_file, "L",
26315                                           CODE_LABEL_NUMBER (label));
26316           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26317         }
26318     }
26319 }
26320
26321 #ifndef ARM_PE
26322 /* Symbols in the text segment can be accessed without indirecting via the
26323    constant pool; it may take an extra binary operation, but this is still
26324    faster than indirecting via memory.  Don't do this when not optimizing,
26325    since we won't be calculating al of the offsets necessary to do this
26326    simplification.  */
26327
26328 static void
26329 arm_encode_section_info (tree decl, rtx rtl, int first)
26330 {
26331   if (optimize > 0 && TREE_CONSTANT (decl))
26332     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26333
26334   default_encode_section_info (decl, rtl, first);
26335 }
26336 #endif /* !ARM_PE */
26337
26338 static void
26339 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26340 {
26341   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26342       && !strcmp (prefix, "L"))
26343     {
26344       arm_ccfsm_state = 0;
26345       arm_target_insn = NULL;
26346     }
26347   default_internal_label (stream, prefix, labelno);
26348 }
26349
26350 /* Output code to add DELTA to the first argument, and then jump
26351    to FUNCTION.  Used for C++ multiple inheritance.  */
26352
26353 static void
26354 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26355                      HOST_WIDE_INT, tree function)
26356 {
26357   static int thunk_label = 0;
26358   char label[256];
26359   char labelpc[256];
26360   int mi_delta = delta;
26361   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26362   int shift = 0;
26363   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26364                     ? 1 : 0);
26365   if (mi_delta < 0)
26366     mi_delta = - mi_delta;
26367
26368   final_start_function (emit_barrier (), file, 1);
26369
26370   if (TARGET_THUMB1)
26371     {
26372       int labelno = thunk_label++;
26373       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26374       /* Thunks are entered in arm mode when available.  */
26375       if (TARGET_THUMB1_ONLY)
26376         {
26377           /* push r3 so we can use it as a temporary.  */
26378           /* TODO: Omit this save if r3 is not used.  */
26379           fputs ("\tpush {r3}\n", file);
26380           fputs ("\tldr\tr3, ", file);
26381         }
26382       else
26383         {
26384           fputs ("\tldr\tr12, ", file);
26385         }
26386       assemble_name (file, label);
26387       fputc ('\n', file);
26388       if (flag_pic)
26389         {
26390           /* If we are generating PIC, the ldr instruction below loads
26391              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26392              the address of the add + 8, so we have:
26393
26394              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26395                  = target + 1.
26396
26397              Note that we have "+ 1" because some versions of GNU ld
26398              don't set the low bit of the result for R_ARM_REL32
26399              relocations against thumb function symbols.
26400              On ARMv6M this is +4, not +8.  */
26401           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26402           assemble_name (file, labelpc);
26403           fputs (":\n", file);
26404           if (TARGET_THUMB1_ONLY)
26405             {
26406               /* This is 2 insns after the start of the thunk, so we know it
26407                  is 4-byte aligned.  */
26408               fputs ("\tadd\tr3, pc, r3\n", file);
26409               fputs ("\tmov r12, r3\n", file);
26410             }
26411           else
26412             fputs ("\tadd\tr12, pc, r12\n", file);
26413         }
26414       else if (TARGET_THUMB1_ONLY)
26415         fputs ("\tmov r12, r3\n", file);
26416     }
26417   if (TARGET_THUMB1_ONLY)
26418     {
26419       if (mi_delta > 255)
26420         {
26421           fputs ("\tldr\tr3, ", file);
26422           assemble_name (file, label);
26423           fputs ("+4\n", file);
26424           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26425                        mi_op, this_regno, this_regno);
26426         }
26427       else if (mi_delta != 0)
26428         {
26429           /* Thumb1 unified syntax requires s suffix in instruction name when
26430              one of the operands is immediate.  */
26431           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26432                        mi_op, this_regno, this_regno,
26433                        mi_delta);
26434         }
26435     }
26436   else
26437     {
26438       /* TODO: Use movw/movt for large constants when available.  */
26439       while (mi_delta != 0)
26440         {
26441           if ((mi_delta & (3 << shift)) == 0)
26442             shift += 2;
26443           else
26444             {
26445               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26446                            mi_op, this_regno, this_regno,
26447                            mi_delta & (0xff << shift));
26448               mi_delta &= ~(0xff << shift);
26449               shift += 8;
26450             }
26451         }
26452     }
26453   if (TARGET_THUMB1)
26454     {
26455       if (TARGET_THUMB1_ONLY)
26456         fputs ("\tpop\t{r3}\n", file);
26457
26458       fprintf (file, "\tbx\tr12\n");
26459       ASM_OUTPUT_ALIGN (file, 2);
26460       assemble_name (file, label);
26461       fputs (":\n", file);
26462       if (flag_pic)
26463         {
26464           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26465           rtx tem = XEXP (DECL_RTL (function), 0);
26466           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26467              pipeline offset is four rather than eight.  Adjust the offset
26468              accordingly.  */
26469           tem = plus_constant (GET_MODE (tem), tem,
26470                                TARGET_THUMB1_ONLY ? -3 : -7);
26471           tem = gen_rtx_MINUS (GET_MODE (tem),
26472                                tem,
26473                                gen_rtx_SYMBOL_REF (Pmode,
26474                                                    ggc_strdup (labelpc)));
26475           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26476         }
26477       else
26478         /* Output ".word .LTHUNKn".  */
26479         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26480
26481       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26482         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26483     }
26484   else
26485     {
26486       fputs ("\tb\t", file);
26487       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26488       if (NEED_PLT_RELOC)
26489         fputs ("(PLT)", file);
26490       fputc ('\n', file);
26491     }
26492
26493   final_end_function ();
26494 }
26495
26496 /* MI thunk handling for TARGET_32BIT.  */
26497
26498 static void
26499 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26500                        HOST_WIDE_INT vcall_offset, tree function)
26501 {
26502   /* On ARM, this_regno is R0 or R1 depending on
26503      whether the function returns an aggregate or not.
26504   */
26505   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26506                                        function)
26507                     ? R1_REGNUM : R0_REGNUM);
26508
26509   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26510   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26511   reload_completed = 1;
26512   emit_note (NOTE_INSN_PROLOGUE_END);
26513
26514   /* Add DELTA to THIS_RTX.  */
26515   if (delta != 0)
26516     arm_split_constant (PLUS, Pmode, NULL_RTX,
26517                         delta, this_rtx, this_rtx, false);
26518
26519   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26520   if (vcall_offset != 0)
26521     {
26522       /* Load *THIS_RTX.  */
26523       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26524       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26525       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26526                           false);
26527       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26528       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26529       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26530     }
26531
26532   /* Generate a tail call to the target function.  */
26533   if (!TREE_USED (function))
26534     {
26535       assemble_external (function);
26536       TREE_USED (function) = 1;
26537     }
26538   rtx funexp = XEXP (DECL_RTL (function), 0);
26539   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26540   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26541   SIBLING_CALL_P (insn) = 1;
26542
26543   insn = get_insns ();
26544   shorten_branches (insn);
26545   final_start_function (insn, file, 1);
26546   final (insn, file, 1);
26547   final_end_function ();
26548
26549   /* Stop pretending this is a post-reload pass.  */
26550   reload_completed = 0;
26551 }
26552
26553 /* Output code to add DELTA to the first argument, and then jump
26554    to FUNCTION.  Used for C++ multiple inheritance.  */
26555
26556 static void
26557 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26558                      HOST_WIDE_INT vcall_offset, tree function)
26559 {
26560   if (TARGET_32BIT)
26561     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26562   else
26563     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26564 }
26565
26566 int
26567 arm_emit_vector_const (FILE *file, rtx x)
26568 {
26569   int i;
26570   const char * pattern;
26571
26572   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26573
26574   switch (GET_MODE (x))
26575     {
26576     case E_V2SImode: pattern = "%08x"; break;
26577     case E_V4HImode: pattern = "%04x"; break;
26578     case E_V8QImode: pattern = "%02x"; break;
26579     default:       gcc_unreachable ();
26580     }
26581
26582   fprintf (file, "0x");
26583   for (i = CONST_VECTOR_NUNITS (x); i--;)
26584     {
26585       rtx element;
26586
26587       element = CONST_VECTOR_ELT (x, i);
26588       fprintf (file, pattern, INTVAL (element));
26589     }
26590
26591   return 1;
26592 }
26593
26594 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26595    HFmode constant pool entries are actually loaded with ldr.  */
26596 void
26597 arm_emit_fp16_const (rtx c)
26598 {
26599   long bits;
26600
26601   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26602   if (WORDS_BIG_ENDIAN)
26603     assemble_zeros (2);
26604   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26605   if (!WORDS_BIG_ENDIAN)
26606     assemble_zeros (2);
26607 }
26608
26609 const char *
26610 arm_output_load_gr (rtx *operands)
26611 {
26612   rtx reg;
26613   rtx offset;
26614   rtx wcgr;
26615   rtx sum;
26616
26617   if (!MEM_P (operands [1])
26618       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26619       || !REG_P (reg = XEXP (sum, 0))
26620       || !CONST_INT_P (offset = XEXP (sum, 1))
26621       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26622     return "wldrw%?\t%0, %1";
26623
26624   /* Fix up an out-of-range load of a GR register.  */
26625   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26626   wcgr = operands[0];
26627   operands[0] = reg;
26628   output_asm_insn ("ldr%?\t%0, %1", operands);
26629
26630   operands[0] = wcgr;
26631   operands[1] = reg;
26632   output_asm_insn ("tmcr%?\t%0, %1", operands);
26633   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26634
26635   return "";
26636 }
26637
26638 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26639
26640    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26641    named arg and all anonymous args onto the stack.
26642    XXX I know the prologue shouldn't be pushing registers, but it is faster
26643    that way.  */
26644
26645 static void
26646 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26647                             machine_mode mode,
26648                             tree type,
26649                             int *pretend_size,
26650                             int second_time ATTRIBUTE_UNUSED)
26651 {
26652   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26653   int nregs;
26654
26655   cfun->machine->uses_anonymous_args = 1;
26656   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26657     {
26658       nregs = pcum->aapcs_ncrn;
26659       if (nregs & 1)
26660         {
26661           int res = arm_needs_doubleword_align (mode, type);
26662           if (res < 0 && warn_psabi)
26663             inform (input_location, "parameter passing for argument of "
26664                     "type %qT changed in GCC 7.1", type);
26665           else if (res > 0)
26666             nregs++;
26667         }
26668     }
26669   else
26670     nregs = pcum->nregs;
26671
26672   if (nregs < NUM_ARG_REGS)
26673     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26674 }
26675
26676 /* We can't rely on the caller doing the proper promotion when
26677    using APCS or ATPCS.  */
26678
26679 static bool
26680 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26681 {
26682     return !TARGET_AAPCS_BASED;
26683 }
26684
26685 static machine_mode
26686 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26687                            machine_mode mode,
26688                            int *punsignedp ATTRIBUTE_UNUSED,
26689                            const_tree fntype ATTRIBUTE_UNUSED,
26690                            int for_return ATTRIBUTE_UNUSED)
26691 {
26692   if (GET_MODE_CLASS (mode) == MODE_INT
26693       && GET_MODE_SIZE (mode) < 4)
26694     return SImode;
26695
26696   return mode;
26697 }
26698
26699
26700 static bool
26701 arm_default_short_enums (void)
26702 {
26703   return ARM_DEFAULT_SHORT_ENUMS;
26704 }
26705
26706
26707 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26708
26709 static bool
26710 arm_align_anon_bitfield (void)
26711 {
26712   return TARGET_AAPCS_BASED;
26713 }
26714
26715
26716 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26717
26718 static tree
26719 arm_cxx_guard_type (void)
26720 {
26721   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26722 }
26723
26724
26725 /* The EABI says test the least significant bit of a guard variable.  */
26726
26727 static bool
26728 arm_cxx_guard_mask_bit (void)
26729 {
26730   return TARGET_AAPCS_BASED;
26731 }
26732
26733
26734 /* The EABI specifies that all array cookies are 8 bytes long.  */
26735
26736 static tree
26737 arm_get_cookie_size (tree type)
26738 {
26739   tree size;
26740
26741   if (!TARGET_AAPCS_BASED)
26742     return default_cxx_get_cookie_size (type);
26743
26744   size = build_int_cst (sizetype, 8);
26745   return size;
26746 }
26747
26748
26749 /* The EABI says that array cookies should also contain the element size.  */
26750
26751 static bool
26752 arm_cookie_has_size (void)
26753 {
26754   return TARGET_AAPCS_BASED;
26755 }
26756
26757
26758 /* The EABI says constructors and destructors should return a pointer to
26759    the object constructed/destroyed.  */
26760
26761 static bool
26762 arm_cxx_cdtor_returns_this (void)
26763 {
26764   return TARGET_AAPCS_BASED;
26765 }
26766
26767 /* The EABI says that an inline function may never be the key
26768    method.  */
26769
26770 static bool
26771 arm_cxx_key_method_may_be_inline (void)
26772 {
26773   return !TARGET_AAPCS_BASED;
26774 }
26775
26776 static void
26777 arm_cxx_determine_class_data_visibility (tree decl)
26778 {
26779   if (!TARGET_AAPCS_BASED
26780       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26781     return;
26782
26783   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26784      is exported.  However, on systems without dynamic vague linkage,
26785      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26786   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26787     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26788   else
26789     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26790   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26791 }
26792
26793 static bool
26794 arm_cxx_class_data_always_comdat (void)
26795 {
26796   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26797      vague linkage if the class has no key function.  */
26798   return !TARGET_AAPCS_BASED;
26799 }
26800
26801
26802 /* The EABI says __aeabi_atexit should be used to register static
26803    destructors.  */
26804
26805 static bool
26806 arm_cxx_use_aeabi_atexit (void)
26807 {
26808   return TARGET_AAPCS_BASED;
26809 }
26810
26811
26812 void
26813 arm_set_return_address (rtx source, rtx scratch)
26814 {
26815   arm_stack_offsets *offsets;
26816   HOST_WIDE_INT delta;
26817   rtx addr;
26818   unsigned long saved_regs;
26819
26820   offsets = arm_get_frame_offsets ();
26821   saved_regs = offsets->saved_regs_mask;
26822
26823   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26824     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26825   else
26826     {
26827       if (frame_pointer_needed)
26828         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26829       else
26830         {
26831           /* LR will be the first saved register.  */
26832           delta = offsets->outgoing_args - (offsets->frame + 4);
26833
26834
26835           if (delta >= 4096)
26836             {
26837               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26838                                      GEN_INT (delta & ~4095)));
26839               addr = scratch;
26840               delta &= 4095;
26841             }
26842           else
26843             addr = stack_pointer_rtx;
26844
26845           addr = plus_constant (Pmode, addr, delta);
26846         }
26847       /* The store needs to be marked as frame related in order to prevent
26848          DSE from deleting it as dead if it is based on fp.  */
26849       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26850       RTX_FRAME_RELATED_P (insn) = 1;
26851       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26852     }
26853 }
26854
26855
26856 void
26857 thumb_set_return_address (rtx source, rtx scratch)
26858 {
26859   arm_stack_offsets *offsets;
26860   HOST_WIDE_INT delta;
26861   HOST_WIDE_INT limit;
26862   int reg;
26863   rtx addr;
26864   unsigned long mask;
26865
26866   emit_use (source);
26867
26868   offsets = arm_get_frame_offsets ();
26869   mask = offsets->saved_regs_mask;
26870   if (mask & (1 << LR_REGNUM))
26871     {
26872       limit = 1024;
26873       /* Find the saved regs.  */
26874       if (frame_pointer_needed)
26875         {
26876           delta = offsets->soft_frame - offsets->saved_args;
26877           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26878           if (TARGET_THUMB1)
26879             limit = 128;
26880         }
26881       else
26882         {
26883           delta = offsets->outgoing_args - offsets->saved_args;
26884           reg = SP_REGNUM;
26885         }
26886       /* Allow for the stack frame.  */
26887       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26888         delta -= 16;
26889       /* The link register is always the first saved register.  */
26890       delta -= 4;
26891
26892       /* Construct the address.  */
26893       addr = gen_rtx_REG (SImode, reg);
26894       if (delta > limit)
26895         {
26896           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26897           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26898           addr = scratch;
26899         }
26900       else
26901         addr = plus_constant (Pmode, addr, delta);
26902
26903       /* The store needs to be marked as frame related in order to prevent
26904          DSE from deleting it as dead if it is based on fp.  */
26905       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26906       RTX_FRAME_RELATED_P (insn) = 1;
26907       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26908     }
26909   else
26910     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26911 }
26912
26913 /* Implements target hook vector_mode_supported_p.  */
26914 bool
26915 arm_vector_mode_supported_p (machine_mode mode)
26916 {
26917   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26918   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26919       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26920       || mode == V2DImode || mode == V8HFmode))
26921     return true;
26922
26923   if ((TARGET_NEON || TARGET_IWMMXT)
26924       && ((mode == V2SImode)
26925           || (mode == V4HImode)
26926           || (mode == V8QImode)))
26927     return true;
26928
26929   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26930       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26931       || mode == V2HAmode))
26932     return true;
26933
26934   return false;
26935 }
26936
26937 /* Implements target hook array_mode_supported_p.  */
26938
26939 static bool
26940 arm_array_mode_supported_p (machine_mode mode,
26941                             unsigned HOST_WIDE_INT nelems)
26942 {
26943   if (TARGET_NEON
26944       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26945       && (nelems >= 2 && nelems <= 4))
26946     return true;
26947
26948   return false;
26949 }
26950
26951 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26952    registers when autovectorizing for Neon, at least until multiple vector
26953    widths are supported properly by the middle-end.  */
26954
26955 static machine_mode
26956 arm_preferred_simd_mode (scalar_mode mode)
26957 {
26958   if (TARGET_NEON)
26959     switch (mode)
26960       {
26961       case E_SFmode:
26962         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26963       case E_SImode:
26964         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26965       case E_HImode:
26966         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26967       case E_QImode:
26968         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26969       case E_DImode:
26970         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26971           return V2DImode;
26972         break;
26973
26974       default:;
26975       }
26976
26977   if (TARGET_REALLY_IWMMXT)
26978     switch (mode)
26979       {
26980       case E_SImode:
26981         return V2SImode;
26982       case E_HImode:
26983         return V4HImode;
26984       case E_QImode:
26985         return V8QImode;
26986
26987       default:;
26988       }
26989
26990   return word_mode;
26991 }
26992
26993 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26994
26995    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26996    using r0-r4 for function arguments, r7 for the stack frame and don't have
26997    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26998    potentially problematic instructions accept high registers so this is not
26999    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27000    that require many low registers.  */
27001 static bool
27002 arm_class_likely_spilled_p (reg_class_t rclass)
27003 {
27004   if ((TARGET_THUMB1 && rclass == LO_REGS)
27005       || rclass  == CC_REG)
27006     return true;
27007
27008   return false;
27009 }
27010
27011 /* Implements target hook small_register_classes_for_mode_p.  */
27012 bool
27013 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27014 {
27015   return TARGET_THUMB1;
27016 }
27017
27018 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27019    ARM insns and therefore guarantee that the shift count is modulo 256.
27020    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27021    guarantee no particular behavior for out-of-range counts.  */
27022
27023 static unsigned HOST_WIDE_INT
27024 arm_shift_truncation_mask (machine_mode mode)
27025 {
27026   return mode == SImode ? 255 : 0;
27027 }
27028
27029
27030 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27031
27032 unsigned int
27033 arm_dbx_register_number (unsigned int regno)
27034 {
27035   if (regno < 16)
27036     return regno;
27037
27038   if (IS_VFP_REGNUM (regno))
27039     {
27040       /* See comment in arm_dwarf_register_span.  */
27041       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27042         return 64 + regno - FIRST_VFP_REGNUM;
27043       else
27044         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27045     }
27046
27047   if (IS_IWMMXT_GR_REGNUM (regno))
27048     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27049
27050   if (IS_IWMMXT_REGNUM (regno))
27051     return 112 + regno - FIRST_IWMMXT_REGNUM;
27052
27053   return DWARF_FRAME_REGISTERS;
27054 }
27055
27056 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27057    GCC models tham as 64 32-bit registers, so we need to describe this to
27058    the DWARF generation code.  Other registers can use the default.  */
27059 static rtx
27060 arm_dwarf_register_span (rtx rtl)
27061 {
27062   machine_mode mode;
27063   unsigned regno;
27064   rtx parts[16];
27065   int nregs;
27066   int i;
27067
27068   regno = REGNO (rtl);
27069   if (!IS_VFP_REGNUM (regno))
27070     return NULL_RTX;
27071
27072   /* XXX FIXME: The EABI defines two VFP register ranges:
27073         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27074         256-287: D0-D31
27075      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27076      corresponding D register.  Until GDB supports this, we shall use the
27077      legacy encodings.  We also use these encodings for D0-D15 for
27078      compatibility with older debuggers.  */
27079   mode = GET_MODE (rtl);
27080   if (GET_MODE_SIZE (mode) < 8)
27081     return NULL_RTX;
27082
27083   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27084     {
27085       nregs = GET_MODE_SIZE (mode) / 4;
27086       for (i = 0; i < nregs; i += 2)
27087         if (TARGET_BIG_END)
27088           {
27089             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27090             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27091           }
27092         else
27093           {
27094             parts[i] = gen_rtx_REG (SImode, regno + i);
27095             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27096           }
27097     }
27098   else
27099     {
27100       nregs = GET_MODE_SIZE (mode) / 8;
27101       for (i = 0; i < nregs; i++)
27102         parts[i] = gen_rtx_REG (DImode, regno + i);
27103     }
27104
27105   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27106 }
27107
27108 #if ARM_UNWIND_INFO
27109 /* Emit unwind directives for a store-multiple instruction or stack pointer
27110    push during alignment.
27111    These should only ever be generated by the function prologue code, so
27112    expect them to have a particular form.
27113    The store-multiple instruction sometimes pushes pc as the last register,
27114    although it should not be tracked into unwind information, or for -Os
27115    sometimes pushes some dummy registers before first register that needs
27116    to be tracked in unwind information; such dummy registers are there just
27117    to avoid separate stack adjustment, and will not be restored in the
27118    epilogue.  */
27119
27120 static void
27121 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27122 {
27123   int i;
27124   HOST_WIDE_INT offset;
27125   HOST_WIDE_INT nregs;
27126   int reg_size;
27127   unsigned reg;
27128   unsigned lastreg;
27129   unsigned padfirst = 0, padlast = 0;
27130   rtx e;
27131
27132   e = XVECEXP (p, 0, 0);
27133   gcc_assert (GET_CODE (e) == SET);
27134
27135   /* First insn will adjust the stack pointer.  */
27136   gcc_assert (GET_CODE (e) == SET
27137               && REG_P (SET_DEST (e))
27138               && REGNO (SET_DEST (e)) == SP_REGNUM
27139               && GET_CODE (SET_SRC (e)) == PLUS);
27140
27141   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27142   nregs = XVECLEN (p, 0) - 1;
27143   gcc_assert (nregs);
27144
27145   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27146   if (reg < 16)
27147     {
27148       /* For -Os dummy registers can be pushed at the beginning to
27149          avoid separate stack pointer adjustment.  */
27150       e = XVECEXP (p, 0, 1);
27151       e = XEXP (SET_DEST (e), 0);
27152       if (GET_CODE (e) == PLUS)
27153         padfirst = INTVAL (XEXP (e, 1));
27154       gcc_assert (padfirst == 0 || optimize_size);
27155       /* The function prologue may also push pc, but not annotate it as it is
27156          never restored.  We turn this into a stack pointer adjustment.  */
27157       e = XVECEXP (p, 0, nregs);
27158       e = XEXP (SET_DEST (e), 0);
27159       if (GET_CODE (e) == PLUS)
27160         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27161       else
27162         padlast = offset - 4;
27163       gcc_assert (padlast == 0 || padlast == 4);
27164       if (padlast == 4)
27165         fprintf (asm_out_file, "\t.pad #4\n");
27166       reg_size = 4;
27167       fprintf (asm_out_file, "\t.save {");
27168     }
27169   else if (IS_VFP_REGNUM (reg))
27170     {
27171       reg_size = 8;
27172       fprintf (asm_out_file, "\t.vsave {");
27173     }
27174   else
27175     /* Unknown register type.  */
27176     gcc_unreachable ();
27177
27178   /* If the stack increment doesn't match the size of the saved registers,
27179      something has gone horribly wrong.  */
27180   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27181
27182   offset = padfirst;
27183   lastreg = 0;
27184   /* The remaining insns will describe the stores.  */
27185   for (i = 1; i <= nregs; i++)
27186     {
27187       /* Expect (set (mem <addr>) (reg)).
27188          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27189       e = XVECEXP (p, 0, i);
27190       gcc_assert (GET_CODE (e) == SET
27191                   && MEM_P (SET_DEST (e))
27192                   && REG_P (SET_SRC (e)));
27193
27194       reg = REGNO (SET_SRC (e));
27195       gcc_assert (reg >= lastreg);
27196
27197       if (i != 1)
27198         fprintf (asm_out_file, ", ");
27199       /* We can't use %r for vfp because we need to use the
27200          double precision register names.  */
27201       if (IS_VFP_REGNUM (reg))
27202         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27203       else
27204         asm_fprintf (asm_out_file, "%r", reg);
27205
27206       if (flag_checking)
27207         {
27208           /* Check that the addresses are consecutive.  */
27209           e = XEXP (SET_DEST (e), 0);
27210           if (GET_CODE (e) == PLUS)
27211             gcc_assert (REG_P (XEXP (e, 0))
27212                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27213                         && CONST_INT_P (XEXP (e, 1))
27214                         && offset == INTVAL (XEXP (e, 1)));
27215           else
27216             gcc_assert (i == 1
27217                         && REG_P (e)
27218                         && REGNO (e) == SP_REGNUM);
27219           offset += reg_size;
27220         }
27221     }
27222   fprintf (asm_out_file, "}\n");
27223   if (padfirst)
27224     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27225 }
27226
27227 /*  Emit unwind directives for a SET.  */
27228
27229 static void
27230 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27231 {
27232   rtx e0;
27233   rtx e1;
27234   unsigned reg;
27235
27236   e0 = XEXP (p, 0);
27237   e1 = XEXP (p, 1);
27238   switch (GET_CODE (e0))
27239     {
27240     case MEM:
27241       /* Pushing a single register.  */
27242       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27243           || !REG_P (XEXP (XEXP (e0, 0), 0))
27244           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27245         abort ();
27246
27247       asm_fprintf (asm_out_file, "\t.save ");
27248       if (IS_VFP_REGNUM (REGNO (e1)))
27249         asm_fprintf(asm_out_file, "{d%d}\n",
27250                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27251       else
27252         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27253       break;
27254
27255     case REG:
27256       if (REGNO (e0) == SP_REGNUM)
27257         {
27258           /* A stack increment.  */
27259           if (GET_CODE (e1) != PLUS
27260               || !REG_P (XEXP (e1, 0))
27261               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27262               || !CONST_INT_P (XEXP (e1, 1)))
27263             abort ();
27264
27265           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27266                        -INTVAL (XEXP (e1, 1)));
27267         }
27268       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27269         {
27270           HOST_WIDE_INT offset;
27271
27272           if (GET_CODE (e1) == PLUS)
27273             {
27274               if (!REG_P (XEXP (e1, 0))
27275                   || !CONST_INT_P (XEXP (e1, 1)))
27276                 abort ();
27277               reg = REGNO (XEXP (e1, 0));
27278               offset = INTVAL (XEXP (e1, 1));
27279               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27280                            HARD_FRAME_POINTER_REGNUM, reg,
27281                            offset);
27282             }
27283           else if (REG_P (e1))
27284             {
27285               reg = REGNO (e1);
27286               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27287                            HARD_FRAME_POINTER_REGNUM, reg);
27288             }
27289           else
27290             abort ();
27291         }
27292       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27293         {
27294           /* Move from sp to reg.  */
27295           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27296         }
27297      else if (GET_CODE (e1) == PLUS
27298               && REG_P (XEXP (e1, 0))
27299               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27300               && CONST_INT_P (XEXP (e1, 1)))
27301         {
27302           /* Set reg to offset from sp.  */
27303           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27304                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27305         }
27306       else
27307         abort ();
27308       break;
27309
27310     default:
27311       abort ();
27312     }
27313 }
27314
27315
27316 /* Emit unwind directives for the given insn.  */
27317
27318 static void
27319 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27320 {
27321   rtx note, pat;
27322   bool handled_one = false;
27323
27324   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27325     return;
27326
27327   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27328       && (TREE_NOTHROW (current_function_decl)
27329           || crtl->all_throwers_are_sibcalls))
27330     return;
27331
27332   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27333     return;
27334
27335   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27336     {
27337       switch (REG_NOTE_KIND (note))
27338         {
27339         case REG_FRAME_RELATED_EXPR:
27340           pat = XEXP (note, 0);
27341           goto found;
27342
27343         case REG_CFA_REGISTER:
27344           pat = XEXP (note, 0);
27345           if (pat == NULL)
27346             {
27347               pat = PATTERN (insn);
27348               if (GET_CODE (pat) == PARALLEL)
27349                 pat = XVECEXP (pat, 0, 0);
27350             }
27351
27352           /* Only emitted for IS_STACKALIGN re-alignment.  */
27353           {
27354             rtx dest, src;
27355             unsigned reg;
27356
27357             src = SET_SRC (pat);
27358             dest = SET_DEST (pat);
27359
27360             gcc_assert (src == stack_pointer_rtx);
27361             reg = REGNO (dest);
27362             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27363                          reg + 0x90, reg);
27364           }
27365           handled_one = true;
27366           break;
27367
27368         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27369            to get correct dwarf information for shrink-wrap.  We should not
27370            emit unwind information for it because these are used either for
27371            pretend arguments or notes to adjust sp and restore registers from
27372            stack.  */
27373         case REG_CFA_DEF_CFA:
27374         case REG_CFA_ADJUST_CFA:
27375         case REG_CFA_RESTORE:
27376           return;
27377
27378         case REG_CFA_EXPRESSION:
27379         case REG_CFA_OFFSET:
27380           /* ??? Only handling here what we actually emit.  */
27381           gcc_unreachable ();
27382
27383         default:
27384           break;
27385         }
27386     }
27387   if (handled_one)
27388     return;
27389   pat = PATTERN (insn);
27390  found:
27391
27392   switch (GET_CODE (pat))
27393     {
27394     case SET:
27395       arm_unwind_emit_set (asm_out_file, pat);
27396       break;
27397
27398     case SEQUENCE:
27399       /* Store multiple.  */
27400       arm_unwind_emit_sequence (asm_out_file, pat);
27401       break;
27402
27403     default:
27404       abort();
27405     }
27406 }
27407
27408
27409 /* Output a reference from a function exception table to the type_info
27410    object X.  The EABI specifies that the symbol should be relocated by
27411    an R_ARM_TARGET2 relocation.  */
27412
27413 static bool
27414 arm_output_ttype (rtx x)
27415 {
27416   fputs ("\t.word\t", asm_out_file);
27417   output_addr_const (asm_out_file, x);
27418   /* Use special relocations for symbol references.  */
27419   if (!CONST_INT_P (x))
27420     fputs ("(TARGET2)", asm_out_file);
27421   fputc ('\n', asm_out_file);
27422
27423   return TRUE;
27424 }
27425
27426 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27427
27428 static void
27429 arm_asm_emit_except_personality (rtx personality)
27430 {
27431   fputs ("\t.personality\t", asm_out_file);
27432   output_addr_const (asm_out_file, personality);
27433   fputc ('\n', asm_out_file);
27434 }
27435 #endif /* ARM_UNWIND_INFO */
27436
27437 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27438
27439 static void
27440 arm_asm_init_sections (void)
27441 {
27442 #if ARM_UNWIND_INFO
27443   exception_section = get_unnamed_section (0, output_section_asm_op,
27444                                            "\t.handlerdata");
27445 #endif /* ARM_UNWIND_INFO */
27446
27447 #ifdef OBJECT_FORMAT_ELF
27448   if (target_pure_code)
27449     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27450 #endif
27451 }
27452
27453 /* Output unwind directives for the start/end of a function.  */
27454
27455 void
27456 arm_output_fn_unwind (FILE * f, bool prologue)
27457 {
27458   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27459     return;
27460
27461   if (prologue)
27462     fputs ("\t.fnstart\n", f);
27463   else
27464     {
27465       /* If this function will never be unwound, then mark it as such.
27466          The came condition is used in arm_unwind_emit to suppress
27467          the frame annotations.  */
27468       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27469           && (TREE_NOTHROW (current_function_decl)
27470               || crtl->all_throwers_are_sibcalls))
27471         fputs("\t.cantunwind\n", f);
27472
27473       fputs ("\t.fnend\n", f);
27474     }
27475 }
27476
27477 static bool
27478 arm_emit_tls_decoration (FILE *fp, rtx x)
27479 {
27480   enum tls_reloc reloc;
27481   rtx val;
27482
27483   val = XVECEXP (x, 0, 0);
27484   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27485
27486   output_addr_const (fp, val);
27487
27488   switch (reloc)
27489     {
27490     case TLS_GD32:
27491       fputs ("(tlsgd)", fp);
27492       break;
27493     case TLS_LDM32:
27494       fputs ("(tlsldm)", fp);
27495       break;
27496     case TLS_LDO32:
27497       fputs ("(tlsldo)", fp);
27498       break;
27499     case TLS_IE32:
27500       fputs ("(gottpoff)", fp);
27501       break;
27502     case TLS_LE32:
27503       fputs ("(tpoff)", fp);
27504       break;
27505     case TLS_DESCSEQ:
27506       fputs ("(tlsdesc)", fp);
27507       break;
27508     default:
27509       gcc_unreachable ();
27510     }
27511
27512   switch (reloc)
27513     {
27514     case TLS_GD32:
27515     case TLS_LDM32:
27516     case TLS_IE32:
27517     case TLS_DESCSEQ:
27518       fputs (" + (. - ", fp);
27519       output_addr_const (fp, XVECEXP (x, 0, 2));
27520       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27521       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27522       output_addr_const (fp, XVECEXP (x, 0, 3));
27523       fputc (')', fp);
27524       break;
27525     default:
27526       break;
27527     }
27528
27529   return TRUE;
27530 }
27531
27532 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27533
27534 static void
27535 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27536 {
27537   gcc_assert (size == 4);
27538   fputs ("\t.word\t", file);
27539   output_addr_const (file, x);
27540   fputs ("(tlsldo)", file);
27541 }
27542
27543 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27544
27545 static bool
27546 arm_output_addr_const_extra (FILE *fp, rtx x)
27547 {
27548   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27549     return arm_emit_tls_decoration (fp, x);
27550   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27551     {
27552       char label[256];
27553       int labelno = INTVAL (XVECEXP (x, 0, 0));
27554
27555       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27556       assemble_name_raw (fp, label);
27557
27558       return TRUE;
27559     }
27560   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27561     {
27562       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27563       if (GOT_PCREL)
27564         fputs ("+.", fp);
27565       fputs ("-(", fp);
27566       output_addr_const (fp, XVECEXP (x, 0, 0));
27567       fputc (')', fp);
27568       return TRUE;
27569     }
27570   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27571     {
27572       output_addr_const (fp, XVECEXP (x, 0, 0));
27573       if (GOT_PCREL)
27574         fputs ("+.", fp);
27575       fputs ("-(", fp);
27576       output_addr_const (fp, XVECEXP (x, 0, 1));
27577       fputc (')', fp);
27578       return TRUE;
27579     }
27580   else if (GET_CODE (x) == CONST_VECTOR)
27581     return arm_emit_vector_const (fp, x);
27582
27583   return FALSE;
27584 }
27585
27586 /* Output assembly for a shift instruction.
27587    SET_FLAGS determines how the instruction modifies the condition codes.
27588    0 - Do not set condition codes.
27589    1 - Set condition codes.
27590    2 - Use smallest instruction.  */
27591 const char *
27592 arm_output_shift(rtx * operands, int set_flags)
27593 {
27594   char pattern[100];
27595   static const char flag_chars[3] = {'?', '.', '!'};
27596   const char *shift;
27597   HOST_WIDE_INT val;
27598   char c;
27599
27600   c = flag_chars[set_flags];
27601   shift = shift_op(operands[3], &val);
27602   if (shift)
27603     {
27604       if (val != -1)
27605         operands[2] = GEN_INT(val);
27606       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27607     }
27608   else
27609     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27610
27611   output_asm_insn (pattern, operands);
27612   return "";
27613 }
27614
27615 /* Output assembly for a WMMX immediate shift instruction.  */
27616 const char *
27617 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27618 {
27619   int shift = INTVAL (operands[2]);
27620   char templ[50];
27621   machine_mode opmode = GET_MODE (operands[0]);
27622
27623   gcc_assert (shift >= 0);
27624
27625   /* If the shift value in the register versions is > 63 (for D qualifier),
27626      31 (for W qualifier) or 15 (for H qualifier).  */
27627   if (((opmode == V4HImode) && (shift > 15))
27628         || ((opmode == V2SImode) && (shift > 31))
27629         || ((opmode == DImode) && (shift > 63)))
27630   {
27631     if (wror_or_wsra)
27632       {
27633         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27634         output_asm_insn (templ, operands);
27635         if (opmode == DImode)
27636           {
27637             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27638             output_asm_insn (templ, operands);
27639           }
27640       }
27641     else
27642       {
27643         /* The destination register will contain all zeros.  */
27644         sprintf (templ, "wzero\t%%0");
27645         output_asm_insn (templ, operands);
27646       }
27647     return "";
27648   }
27649
27650   if ((opmode == DImode) && (shift > 32))
27651     {
27652       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27653       output_asm_insn (templ, operands);
27654       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27655       output_asm_insn (templ, operands);
27656     }
27657   else
27658     {
27659       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27660       output_asm_insn (templ, operands);
27661     }
27662   return "";
27663 }
27664
27665 /* Output assembly for a WMMX tinsr instruction.  */
27666 const char *
27667 arm_output_iwmmxt_tinsr (rtx *operands)
27668 {
27669   int mask = INTVAL (operands[3]);
27670   int i;
27671   char templ[50];
27672   int units = mode_nunits[GET_MODE (operands[0])];
27673   gcc_assert ((mask & (mask - 1)) == 0);
27674   for (i = 0; i < units; ++i)
27675     {
27676       if ((mask & 0x01) == 1)
27677         {
27678           break;
27679         }
27680       mask >>= 1;
27681     }
27682   gcc_assert (i < units);
27683   {
27684     switch (GET_MODE (operands[0]))
27685       {
27686       case E_V8QImode:
27687         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27688         break;
27689       case E_V4HImode:
27690         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27691         break;
27692       case E_V2SImode:
27693         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27694         break;
27695       default:
27696         gcc_unreachable ();
27697         break;
27698       }
27699     output_asm_insn (templ, operands);
27700   }
27701   return "";
27702 }
27703
27704 /* Output a Thumb-1 casesi dispatch sequence.  */
27705 const char *
27706 thumb1_output_casesi (rtx *operands)
27707 {
27708   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27709
27710   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27711
27712   switch (GET_MODE(diff_vec))
27713     {
27714     case E_QImode:
27715       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27716               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27717     case E_HImode:
27718       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27719               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27720     case E_SImode:
27721       return "bl\t%___gnu_thumb1_case_si";
27722     default:
27723       gcc_unreachable ();
27724     }
27725 }
27726
27727 /* Output a Thumb-2 casesi instruction.  */
27728 const char *
27729 thumb2_output_casesi (rtx *operands)
27730 {
27731   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27732
27733   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27734
27735   output_asm_insn ("cmp\t%0, %1", operands);
27736   output_asm_insn ("bhi\t%l3", operands);
27737   switch (GET_MODE(diff_vec))
27738     {
27739     case E_QImode:
27740       return "tbb\t[%|pc, %0]";
27741     case E_HImode:
27742       return "tbh\t[%|pc, %0, lsl #1]";
27743     case E_SImode:
27744       if (flag_pic)
27745         {
27746           output_asm_insn ("adr\t%4, %l2", operands);
27747           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27748           output_asm_insn ("add\t%4, %4, %5", operands);
27749           return "bx\t%4";
27750         }
27751       else
27752         {
27753           output_asm_insn ("adr\t%4, %l2", operands);
27754           return "ldr\t%|pc, [%4, %0, lsl #2]";
27755         }
27756     default:
27757       gcc_unreachable ();
27758     }
27759 }
27760
27761 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27762    per-core tuning structs.  */
27763 static int
27764 arm_issue_rate (void)
27765 {
27766   return current_tune->issue_rate;
27767 }
27768
27769 /* Return how many instructions should scheduler lookahead to choose the
27770    best one.  */
27771 static int
27772 arm_first_cycle_multipass_dfa_lookahead (void)
27773 {
27774   int issue_rate = arm_issue_rate ();
27775
27776   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27777 }
27778
27779 /* Enable modeling of L2 auto-prefetcher.  */
27780 static int
27781 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27782 {
27783   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27784 }
27785
27786 const char *
27787 arm_mangle_type (const_tree type)
27788 {
27789   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27790      has to be managled as if it is in the "std" namespace.  */
27791   if (TARGET_AAPCS_BASED
27792       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27793     return "St9__va_list";
27794
27795   /* Half-precision float.  */
27796   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27797     return "Dh";
27798
27799   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27800      builtin type.  */
27801   if (TYPE_NAME (type) != NULL)
27802     return arm_mangle_builtin_type (type);
27803
27804   /* Use the default mangling.  */
27805   return NULL;
27806 }
27807
27808 /* Order of allocation of core registers for Thumb: this allocation is
27809    written over the corresponding initial entries of the array
27810    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27811    first.  Saving and restoring a low register is usually cheaper than
27812    using a call-clobbered high register.  */
27813
27814 static const int thumb_core_reg_alloc_order[] =
27815 {
27816    3,  2,  1,  0,  4,  5,  6,  7,
27817   12, 14,  8,  9, 10, 11
27818 };
27819
27820 /* Adjust register allocation order when compiling for Thumb.  */
27821
27822 void
27823 arm_order_regs_for_local_alloc (void)
27824 {
27825   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27826   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27827   if (TARGET_THUMB)
27828     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27829             sizeof (thumb_core_reg_alloc_order));
27830 }
27831
27832 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27833
27834 bool
27835 arm_frame_pointer_required (void)
27836 {
27837   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27838     return true;
27839
27840   /* If the function receives nonlocal gotos, it needs to save the frame
27841      pointer in the nonlocal_goto_save_area object.  */
27842   if (cfun->has_nonlocal_label)
27843     return true;
27844
27845   /* The frame pointer is required for non-leaf APCS frames.  */
27846   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27847     return true;
27848
27849   /* If we are probing the stack in the prologue, we will have a faulting
27850      instruction prior to the stack adjustment and this requires a frame
27851      pointer if we want to catch the exception using the EABI unwinder.  */
27852   if (!IS_INTERRUPT (arm_current_func_type ())
27853       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27854       && arm_except_unwind_info (&global_options) == UI_TARGET
27855       && cfun->can_throw_non_call_exceptions)
27856     {
27857       HOST_WIDE_INT size = get_frame_size ();
27858
27859       /* That's irrelevant if there is no stack adjustment.  */
27860       if (size <= 0)
27861         return false;
27862
27863       /* That's relevant only if there is a stack probe.  */
27864       if (crtl->is_leaf && !cfun->calls_alloca)
27865         {
27866           /* We don't have the final size of the frame so adjust.  */
27867           size += 32 * UNITS_PER_WORD;
27868           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27869             return true;
27870         }
27871       else
27872         return true;
27873     }
27874
27875   return false;
27876 }
27877
27878 /* Only thumb1 can't support conditional execution, so return true if
27879    the target is not thumb1.  */
27880 static bool
27881 arm_have_conditional_execution (void)
27882 {
27883   return !TARGET_THUMB1;
27884 }
27885
27886 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27887 static HOST_WIDE_INT
27888 arm_vector_alignment (const_tree type)
27889 {
27890   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27891
27892   if (TARGET_AAPCS_BASED)
27893     align = MIN (align, 64);
27894
27895   return align;
27896 }
27897
27898 static unsigned int
27899 arm_autovectorize_vector_sizes (void)
27900 {
27901   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27902 }
27903
27904 static bool
27905 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27906 {
27907   /* Vectors which aren't in packed structures will not be less aligned than
27908      the natural alignment of their element type, so this is safe.  */
27909   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27910     return !is_packed;
27911
27912   return default_builtin_vector_alignment_reachable (type, is_packed);
27913 }
27914
27915 static bool
27916 arm_builtin_support_vector_misalignment (machine_mode mode,
27917                                          const_tree type, int misalignment,
27918                                          bool is_packed)
27919 {
27920   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27921     {
27922       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27923
27924       if (is_packed)
27925         return align == 1;
27926
27927       /* If the misalignment is unknown, we should be able to handle the access
27928          so long as it is not to a member of a packed data structure.  */
27929       if (misalignment == -1)
27930         return true;
27931
27932       /* Return true if the misalignment is a multiple of the natural alignment
27933          of the vector's element type.  This is probably always going to be
27934          true in practice, since we've already established that this isn't a
27935          packed access.  */
27936       return ((misalignment % align) == 0);
27937     }
27938
27939   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27940                                                       is_packed);
27941 }
27942
27943 static void
27944 arm_conditional_register_usage (void)
27945 {
27946   int regno;
27947
27948   if (TARGET_THUMB1 && optimize_size)
27949     {
27950       /* When optimizing for size on Thumb-1, it's better not
27951         to use the HI regs, because of the overhead of
27952         stacking them.  */
27953       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27954         fixed_regs[regno] = call_used_regs[regno] = 1;
27955     }
27956
27957   /* The link register can be clobbered by any branch insn,
27958      but we have no way to track that at present, so mark
27959      it as unavailable.  */
27960   if (TARGET_THUMB1)
27961     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27962
27963   if (TARGET_32BIT && TARGET_HARD_FLOAT)
27964     {
27965       /* VFPv3 registers are disabled when earlier VFP
27966          versions are selected due to the definition of
27967          LAST_VFP_REGNUM.  */
27968       for (regno = FIRST_VFP_REGNUM;
27969            regno <= LAST_VFP_REGNUM; ++ regno)
27970         {
27971           fixed_regs[regno] = 0;
27972           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27973             || regno >= FIRST_VFP_REGNUM + 32;
27974         }
27975     }
27976
27977   if (TARGET_REALLY_IWMMXT)
27978     {
27979       regno = FIRST_IWMMXT_GR_REGNUM;
27980       /* The 2002/10/09 revision of the XScale ABI has wCG0
27981          and wCG1 as call-preserved registers.  The 2002/11/21
27982          revision changed this so that all wCG registers are
27983          scratch registers.  */
27984       for (regno = FIRST_IWMMXT_GR_REGNUM;
27985            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27986         fixed_regs[regno] = 0;
27987       /* The XScale ABI has wR0 - wR9 as scratch registers,
27988          the rest as call-preserved registers.  */
27989       for (regno = FIRST_IWMMXT_REGNUM;
27990            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27991         {
27992           fixed_regs[regno] = 0;
27993           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27994         }
27995     }
27996
27997   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27998     {
27999       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28000       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28001     }
28002   else if (TARGET_APCS_STACK)
28003     {
28004       fixed_regs[10]     = 1;
28005       call_used_regs[10] = 1;
28006     }
28007   /* -mcaller-super-interworking reserves r11 for calls to
28008      _interwork_r11_call_via_rN().  Making the register global
28009      is an easy way of ensuring that it remains valid for all
28010      calls.  */
28011   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28012       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28013     {
28014       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28015       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28016       if (TARGET_CALLER_INTERWORKING)
28017         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28018     }
28019   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28020 }
28021
28022 static reg_class_t
28023 arm_preferred_rename_class (reg_class_t rclass)
28024 {
28025   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28026      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28027      and code size can be reduced.  */
28028   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28029     return LO_REGS;
28030   else
28031     return NO_REGS;
28032 }
28033
28034 /* Compute the attribute "length" of insn "*push_multi".
28035    So this function MUST be kept in sync with that insn pattern.  */
28036 int
28037 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28038 {
28039   int i, regno, hi_reg;
28040   int num_saves = XVECLEN (parallel_op, 0);
28041
28042   /* ARM mode.  */
28043   if (TARGET_ARM)
28044     return 4;
28045   /* Thumb1 mode.  */
28046   if (TARGET_THUMB1)
28047     return 2;
28048
28049   /* Thumb2 mode.  */
28050   regno = REGNO (first_op);
28051   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28052      list is 8-bit.  Normally this means all registers in the list must be
28053      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28054      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28055      with 16-bit encoding.  */
28056   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28057   for (i = 1; i < num_saves && !hi_reg; i++)
28058     {
28059       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28060       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28061     }
28062
28063   if (!hi_reg)
28064     return 2;
28065   return 4;
28066 }
28067
28068 /* Compute the attribute "length" of insn.  Currently, this function is used
28069    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28070    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28071    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28072    true if OPERANDS contains insn which explicit updates base register.  */
28073
28074 int
28075 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28076 {
28077   /* ARM mode.  */
28078   if (TARGET_ARM)
28079     return 4;
28080   /* Thumb1 mode.  */
28081   if (TARGET_THUMB1)
28082     return 2;
28083
28084   rtx parallel_op = operands[0];
28085   /* Initialize to elements number of PARALLEL.  */
28086   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28087   /* Initialize the value to base register.  */
28088   unsigned regno = REGNO (operands[1]);
28089   /* Skip return and write back pattern.
28090      We only need register pop pattern for later analysis.  */
28091   unsigned first_indx = 0;
28092   first_indx += return_pc ? 1 : 0;
28093   first_indx += write_back_p ? 1 : 0;
28094
28095   /* A pop operation can be done through LDM or POP.  If the base register is SP
28096      and if it's with write back, then a LDM will be alias of POP.  */
28097   bool pop_p = (regno == SP_REGNUM && write_back_p);
28098   bool ldm_p = !pop_p;
28099
28100   /* Check base register for LDM.  */
28101   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28102     return 4;
28103
28104   /* Check each register in the list.  */
28105   for (; indx >= first_indx; indx--)
28106     {
28107       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28108       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28109          comment in arm_attr_length_push_multi.  */
28110       if (REGNO_REG_CLASS (regno) == HI_REGS
28111           && (regno != PC_REGNUM || ldm_p))
28112         return 4;
28113     }
28114
28115   return 2;
28116 }
28117
28118 /* Compute the number of instructions emitted by output_move_double.  */
28119 int
28120 arm_count_output_move_double_insns (rtx *operands)
28121 {
28122   int count;
28123   rtx ops[2];
28124   /* output_move_double may modify the operands array, so call it
28125      here on a copy of the array.  */
28126   ops[0] = operands[0];
28127   ops[1] = operands[1];
28128   output_move_double (ops, false, &count);
28129   return count;
28130 }
28131
28132 int
28133 vfp3_const_double_for_fract_bits (rtx operand)
28134 {
28135   REAL_VALUE_TYPE r0;
28136
28137   if (!CONST_DOUBLE_P (operand))
28138     return 0;
28139
28140   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28141   if (exact_real_inverse (DFmode, &r0)
28142       && !REAL_VALUE_NEGATIVE (r0))
28143     {
28144       if (exact_real_truncate (DFmode, &r0))
28145         {
28146           HOST_WIDE_INT value = real_to_integer (&r0);
28147           value = value & 0xffffffff;
28148           if ((value != 0) && ( (value & (value - 1)) == 0))
28149             {
28150               int ret = exact_log2 (value);
28151               gcc_assert (IN_RANGE (ret, 0, 31));
28152               return ret;
28153             }
28154         }
28155     }
28156   return 0;
28157 }
28158
28159 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28160    log2 is in [1, 32], return that log2.  Otherwise return -1.
28161    This is used in the patterns for vcvt.s32.f32 floating-point to
28162    fixed-point conversions.  */
28163
28164 int
28165 vfp3_const_double_for_bits (rtx x)
28166 {
28167   const REAL_VALUE_TYPE *r;
28168
28169   if (!CONST_DOUBLE_P (x))
28170     return -1;
28171
28172   r = CONST_DOUBLE_REAL_VALUE (x);
28173
28174   if (REAL_VALUE_NEGATIVE (*r)
28175       || REAL_VALUE_ISNAN (*r)
28176       || REAL_VALUE_ISINF (*r)
28177       || !real_isinteger (r, SFmode))
28178     return -1;
28179
28180   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28181
28182 /* The exact_log2 above will have returned -1 if this is
28183    not an exact log2.  */
28184   if (!IN_RANGE (hwint, 1, 32))
28185     return -1;
28186
28187   return hwint;
28188 }
28189
28190 \f
28191 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28192
28193 static void
28194 arm_pre_atomic_barrier (enum memmodel model)
28195 {
28196   if (need_atomic_barrier_p (model, true))
28197     emit_insn (gen_memory_barrier ());
28198 }
28199
28200 static void
28201 arm_post_atomic_barrier (enum memmodel model)
28202 {
28203   if (need_atomic_barrier_p (model, false))
28204     emit_insn (gen_memory_barrier ());
28205 }
28206
28207 /* Emit the load-exclusive and store-exclusive instructions.
28208    Use acquire and release versions if necessary.  */
28209
28210 static void
28211 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28212 {
28213   rtx (*gen) (rtx, rtx);
28214
28215   if (acq)
28216     {
28217       switch (mode)
28218         {
28219         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28220         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28221         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28222         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28223         default:
28224           gcc_unreachable ();
28225         }
28226     }
28227   else
28228     {
28229       switch (mode)
28230         {
28231         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28232         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28233         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28234         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28235         default:
28236           gcc_unreachable ();
28237         }
28238     }
28239
28240   emit_insn (gen (rval, mem));
28241 }
28242
28243 static void
28244 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28245                           rtx mem, bool rel)
28246 {
28247   rtx (*gen) (rtx, rtx, rtx);
28248
28249   if (rel)
28250     {
28251       switch (mode)
28252         {
28253         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28254         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28255         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28256         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28257         default:
28258           gcc_unreachable ();
28259         }
28260     }
28261   else
28262     {
28263       switch (mode)
28264         {
28265         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28266         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28267         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28268         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28269         default:
28270           gcc_unreachable ();
28271         }
28272     }
28273
28274   emit_insn (gen (bval, rval, mem));
28275 }
28276
28277 /* Mark the previous jump instruction as unlikely.  */
28278
28279 static void
28280 emit_unlikely_jump (rtx insn)
28281 {
28282   rtx_insn *jump = emit_jump_insn (insn);
28283   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28284 }
28285
28286 /* Expand a compare and swap pattern.  */
28287
28288 void
28289 arm_expand_compare_and_swap (rtx operands[])
28290 {
28291   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28292   machine_mode mode;
28293   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28294
28295   bval = operands[0];
28296   rval = operands[1];
28297   mem = operands[2];
28298   oldval = operands[3];
28299   newval = operands[4];
28300   is_weak = operands[5];
28301   mod_s = operands[6];
28302   mod_f = operands[7];
28303   mode = GET_MODE (mem);
28304
28305   /* Normally the succ memory model must be stronger than fail, but in the
28306      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28307      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28308
28309   if (TARGET_HAVE_LDACQ
28310       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28311       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28312     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28313
28314   switch (mode)
28315     {
28316     case E_QImode:
28317     case E_HImode:
28318       /* For narrow modes, we're going to perform the comparison in SImode,
28319          so do the zero-extension now.  */
28320       rval = gen_reg_rtx (SImode);
28321       oldval = convert_modes (SImode, mode, oldval, true);
28322       /* FALLTHRU */
28323
28324     case E_SImode:
28325       /* Force the value into a register if needed.  We waited until after
28326          the zero-extension above to do this properly.  */
28327       if (!arm_add_operand (oldval, SImode))
28328         oldval = force_reg (SImode, oldval);
28329       break;
28330
28331     case E_DImode:
28332       if (!cmpdi_operand (oldval, mode))
28333         oldval = force_reg (mode, oldval);
28334       break;
28335
28336     default:
28337       gcc_unreachable ();
28338     }
28339
28340   if (TARGET_THUMB1)
28341     {
28342       switch (mode)
28343         {
28344         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28345         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28346         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28347         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28348         default:
28349           gcc_unreachable ();
28350         }
28351     }
28352   else
28353     {
28354       switch (mode)
28355         {
28356         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28357         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28358         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28359         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28360         default:
28361           gcc_unreachable ();
28362         }
28363     }
28364
28365   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28366   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28367
28368   if (mode == QImode || mode == HImode)
28369     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28370
28371   /* In all cases, we arrange for success to be signaled by Z set.
28372      This arrangement allows for the boolean result to be used directly
28373      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28374      boolean negation of the result is also stored in bval because Thumb-1
28375      backend lacks dependency tracking for CC flag due to flag-setting not
28376      being represented at RTL level.  */
28377   if (TARGET_THUMB1)
28378       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28379   else
28380     {
28381       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28382       emit_insn (gen_rtx_SET (bval, x));
28383     }
28384 }
28385
28386 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28387    another memory store between the load-exclusive and store-exclusive can
28388    reset the monitor from Exclusive to Open state.  This means we must wait
28389    until after reload to split the pattern, lest we get a register spill in
28390    the middle of the atomic sequence.  Success of the compare and swap is
28391    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28392    for Thumb-1 targets (ie. negation of the boolean value returned by
28393    atomic_compare_and_swapmode standard pattern in operand 0).  */
28394
28395 void
28396 arm_split_compare_and_swap (rtx operands[])
28397 {
28398   rtx rval, mem, oldval, newval, neg_bval;
28399   machine_mode mode;
28400   enum memmodel mod_s, mod_f;
28401   bool is_weak;
28402   rtx_code_label *label1, *label2;
28403   rtx x, cond;
28404
28405   rval = operands[1];
28406   mem = operands[2];
28407   oldval = operands[3];
28408   newval = operands[4];
28409   is_weak = (operands[5] != const0_rtx);
28410   mod_s = memmodel_from_int (INTVAL (operands[6]));
28411   mod_f = memmodel_from_int (INTVAL (operands[7]));
28412   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28413   mode = GET_MODE (mem);
28414
28415   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28416
28417   bool use_acquire = TARGET_HAVE_LDACQ
28418                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28419                           || is_mm_release (mod_s));
28420
28421   bool use_release = TARGET_HAVE_LDACQ
28422                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28423                           || is_mm_acquire (mod_s));
28424
28425   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28426      a full barrier is emitted after the store-release.  */
28427   if (is_armv8_sync)
28428     use_acquire = false;
28429
28430   /* Checks whether a barrier is needed and emits one accordingly.  */
28431   if (!(use_acquire || use_release))
28432     arm_pre_atomic_barrier (mod_s);
28433
28434   label1 = NULL;
28435   if (!is_weak)
28436     {
28437       label1 = gen_label_rtx ();
28438       emit_label (label1);
28439     }
28440   label2 = gen_label_rtx ();
28441
28442   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28443
28444   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28445      as required to communicate with arm_expand_compare_and_swap.  */
28446   if (TARGET_32BIT)
28447     {
28448       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28449       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28450       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28451                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28452       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28453     }
28454   else
28455     {
28456       emit_move_insn (neg_bval, const1_rtx);
28457       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28458       if (thumb1_cmpneg_operand (oldval, SImode))
28459         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28460                                                     label2, cond));
28461       else
28462         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28463     }
28464
28465   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28466
28467   /* Weak or strong, we want EQ to be true for success, so that we
28468      match the flags that we got from the compare above.  */
28469   if (TARGET_32BIT)
28470     {
28471       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28472       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28473       emit_insn (gen_rtx_SET (cond, x));
28474     }
28475
28476   if (!is_weak)
28477     {
28478       /* Z is set to boolean value of !neg_bval, as required to communicate
28479          with arm_expand_compare_and_swap.  */
28480       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28481       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28482     }
28483
28484   if (!is_mm_relaxed (mod_f))
28485     emit_label (label2);
28486
28487   /* Checks whether a barrier is needed and emits one accordingly.  */
28488   if (is_armv8_sync
28489       || !(use_acquire || use_release))
28490     arm_post_atomic_barrier (mod_s);
28491
28492   if (is_mm_relaxed (mod_f))
28493     emit_label (label2);
28494 }
28495
28496 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28497    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28498    operation).  Operation is performed on the content at MEM and on VALUE
28499    following the memory model MODEL_RTX.  The content at MEM before and after
28500    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28501    success of the operation is returned in COND.  Using a scratch register or
28502    an operand register for these determines what result is returned for that
28503    pattern.  */
28504
28505 void
28506 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28507                      rtx value, rtx model_rtx, rtx cond)
28508 {
28509   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28510   machine_mode mode = GET_MODE (mem);
28511   machine_mode wmode = (mode == DImode ? DImode : SImode);
28512   rtx_code_label *label;
28513   bool all_low_regs, bind_old_new;
28514   rtx x;
28515
28516   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28517
28518   bool use_acquire = TARGET_HAVE_LDACQ
28519                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28520                           || is_mm_release (model));
28521
28522   bool use_release = TARGET_HAVE_LDACQ
28523                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28524                           || is_mm_acquire (model));
28525
28526   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28527      a full barrier is emitted after the store-release.  */
28528   if (is_armv8_sync)
28529     use_acquire = false;
28530
28531   /* Checks whether a barrier is needed and emits one accordingly.  */
28532   if (!(use_acquire || use_release))
28533     arm_pre_atomic_barrier (model);
28534
28535   label = gen_label_rtx ();
28536   emit_label (label);
28537
28538   if (new_out)
28539     new_out = gen_lowpart (wmode, new_out);
28540   if (old_out)
28541     old_out = gen_lowpart (wmode, old_out);
28542   else
28543     old_out = new_out;
28544   value = simplify_gen_subreg (wmode, value, mode, 0);
28545
28546   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28547
28548   /* Does the operation require destination and first operand to use the same
28549      register?  This is decided by register constraints of relevant insn
28550      patterns in thumb1.md.  */
28551   gcc_assert (!new_out || REG_P (new_out));
28552   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28553                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28554                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28555   bind_old_new =
28556     (TARGET_THUMB1
28557      && code != SET
28558      && code != MINUS
28559      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28560
28561   /* We want to return the old value while putting the result of the operation
28562      in the same register as the old value so copy the old value over to the
28563      destination register and use that register for the operation.  */
28564   if (old_out && bind_old_new)
28565     {
28566       emit_move_insn (new_out, old_out);
28567       old_out = new_out;
28568     }
28569
28570   switch (code)
28571     {
28572     case SET:
28573       new_out = value;
28574       break;
28575
28576     case NOT:
28577       x = gen_rtx_AND (wmode, old_out, value);
28578       emit_insn (gen_rtx_SET (new_out, x));
28579       x = gen_rtx_NOT (wmode, new_out);
28580       emit_insn (gen_rtx_SET (new_out, x));
28581       break;
28582
28583     case MINUS:
28584       if (CONST_INT_P (value))
28585         {
28586           value = GEN_INT (-INTVAL (value));
28587           code = PLUS;
28588         }
28589       /* FALLTHRU */
28590
28591     case PLUS:
28592       if (mode == DImode)
28593         {
28594           /* DImode plus/minus need to clobber flags.  */
28595           /* The adddi3 and subdi3 patterns are incorrectly written so that
28596              they require matching operands, even when we could easily support
28597              three operands.  Thankfully, this can be fixed up post-splitting,
28598              as the individual add+adc patterns do accept three operands and
28599              post-reload cprop can make these moves go away.  */
28600           emit_move_insn (new_out, old_out);
28601           if (code == PLUS)
28602             x = gen_adddi3 (new_out, new_out, value);
28603           else
28604             x = gen_subdi3 (new_out, new_out, value);
28605           emit_insn (x);
28606           break;
28607         }
28608       /* FALLTHRU */
28609
28610     default:
28611       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28612       emit_insn (gen_rtx_SET (new_out, x));
28613       break;
28614     }
28615
28616   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28617                             use_release);
28618
28619   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28620   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28621
28622   /* Checks whether a barrier is needed and emits one accordingly.  */
28623   if (is_armv8_sync
28624       || !(use_acquire || use_release))
28625     arm_post_atomic_barrier (model);
28626 }
28627 \f
28628 #define MAX_VECT_LEN 16
28629
28630 struct expand_vec_perm_d
28631 {
28632   rtx target, op0, op1;
28633   unsigned char perm[MAX_VECT_LEN];
28634   machine_mode vmode;
28635   unsigned char nelt;
28636   bool one_vector_p;
28637   bool testing_p;
28638 };
28639
28640 /* Generate a variable permutation.  */
28641
28642 static void
28643 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28644 {
28645   machine_mode vmode = GET_MODE (target);
28646   bool one_vector_p = rtx_equal_p (op0, op1);
28647
28648   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28649   gcc_checking_assert (GET_MODE (op0) == vmode);
28650   gcc_checking_assert (GET_MODE (op1) == vmode);
28651   gcc_checking_assert (GET_MODE (sel) == vmode);
28652   gcc_checking_assert (TARGET_NEON);
28653
28654   if (one_vector_p)
28655     {
28656       if (vmode == V8QImode)
28657         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28658       else
28659         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28660     }
28661   else
28662     {
28663       rtx pair;
28664
28665       if (vmode == V8QImode)
28666         {
28667           pair = gen_reg_rtx (V16QImode);
28668           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28669           pair = gen_lowpart (TImode, pair);
28670           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28671         }
28672       else
28673         {
28674           pair = gen_reg_rtx (OImode);
28675           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28676           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28677         }
28678     }
28679 }
28680
28681 void
28682 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28683 {
28684   machine_mode vmode = GET_MODE (target);
28685   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28686   bool one_vector_p = rtx_equal_p (op0, op1);
28687   rtx rmask[MAX_VECT_LEN], mask;
28688
28689   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28690      numbering of elements for big-endian, we must reverse the order.  */
28691   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28692
28693   /* The VTBL instruction does not use a modulo index, so we must take care
28694      of that ourselves.  */
28695   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28696   for (i = 0; i < nelt; ++i)
28697     rmask[i] = mask;
28698   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28699   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28700
28701   arm_expand_vec_perm_1 (target, op0, op1, sel);
28702 }
28703
28704 /* Map lane ordering between architectural lane order, and GCC lane order,
28705    taking into account ABI.  See comment above output_move_neon for details.  */
28706
28707 static int
28708 neon_endian_lane_map (machine_mode mode, int lane)
28709 {
28710   if (BYTES_BIG_ENDIAN)
28711   {
28712     int nelems = GET_MODE_NUNITS (mode);
28713     /* Reverse lane order.  */
28714     lane = (nelems - 1 - lane);
28715     /* Reverse D register order, to match ABI.  */
28716     if (GET_MODE_SIZE (mode) == 16)
28717       lane = lane ^ (nelems / 2);
28718   }
28719   return lane;
28720 }
28721
28722 /* Some permutations index into pairs of vectors, this is a helper function
28723    to map indexes into those pairs of vectors.  */
28724
28725 static int
28726 neon_pair_endian_lane_map (machine_mode mode, int lane)
28727 {
28728   int nelem = GET_MODE_NUNITS (mode);
28729   if (BYTES_BIG_ENDIAN)
28730     lane =
28731       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28732   return lane;
28733 }
28734
28735 /* Generate or test for an insn that supports a constant permutation.  */
28736
28737 /* Recognize patterns for the VUZP insns.  */
28738
28739 static bool
28740 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28741 {
28742   unsigned int i, odd, mask, nelt = d->nelt;
28743   rtx out0, out1, in0, in1;
28744   rtx (*gen)(rtx, rtx, rtx, rtx);
28745   int first_elem;
28746   int swap_nelt;
28747
28748   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28749     return false;
28750
28751   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28752      big endian pattern on 64 bit vectors, so we correct for that.  */
28753   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28754     && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28755
28756   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28757
28758   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28759     odd = 0;
28760   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28761     odd = 1;
28762   else
28763     return false;
28764   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28765
28766   for (i = 0; i < nelt; i++)
28767     {
28768       unsigned elt =
28769         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28770       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28771         return false;
28772     }
28773
28774   /* Success!  */
28775   if (d->testing_p)
28776     return true;
28777
28778   switch (d->vmode)
28779     {
28780     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28781     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28782     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28783     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28784     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28785     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28786     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28787     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28788     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28789     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28790     default:
28791       gcc_unreachable ();
28792     }
28793
28794   in0 = d->op0;
28795   in1 = d->op1;
28796   if (swap_nelt != 0)
28797     std::swap (in0, in1);
28798
28799   out0 = d->target;
28800   out1 = gen_reg_rtx (d->vmode);
28801   if (odd)
28802     std::swap (out0, out1);
28803
28804   emit_insn (gen (out0, in0, in1, out1));
28805   return true;
28806 }
28807
28808 /* Recognize patterns for the VZIP insns.  */
28809
28810 static bool
28811 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28812 {
28813   unsigned int i, high, mask, nelt = d->nelt;
28814   rtx out0, out1, in0, in1;
28815   rtx (*gen)(rtx, rtx, rtx, rtx);
28816   int first_elem;
28817   bool is_swapped;
28818
28819   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28820     return false;
28821
28822   is_swapped = BYTES_BIG_ENDIAN;
28823
28824   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28825
28826   high = nelt / 2;
28827   if (first_elem == neon_endian_lane_map (d->vmode, high))
28828     ;
28829   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28830     high = 0;
28831   else
28832     return false;
28833   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28834
28835   for (i = 0; i < nelt / 2; i++)
28836     {
28837       unsigned elt =
28838         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28839       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28840           != elt)
28841         return false;
28842       elt =
28843         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28844       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28845           != elt)
28846         return false;
28847     }
28848
28849   /* Success!  */
28850   if (d->testing_p)
28851     return true;
28852
28853   switch (d->vmode)
28854     {
28855     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28856     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28857     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28858     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28859     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28860     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28861     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28862     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28863     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28864     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28865     default:
28866       gcc_unreachable ();
28867     }
28868
28869   in0 = d->op0;
28870   in1 = d->op1;
28871   if (is_swapped)
28872     std::swap (in0, in1);
28873
28874   out0 = d->target;
28875   out1 = gen_reg_rtx (d->vmode);
28876   if (high)
28877     std::swap (out0, out1);
28878
28879   emit_insn (gen (out0, in0, in1, out1));
28880   return true;
28881 }
28882
28883 /* Recognize patterns for the VREV insns.  */
28884
28885 static bool
28886 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28887 {
28888   unsigned int i, j, diff, nelt = d->nelt;
28889   rtx (*gen)(rtx, rtx);
28890
28891   if (!d->one_vector_p)
28892     return false;
28893
28894   diff = d->perm[0];
28895   switch (diff)
28896     {
28897     case 7:
28898       switch (d->vmode)
28899         {
28900         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28901         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28902         default:
28903           return false;
28904         }
28905       break;
28906     case 3:
28907       switch (d->vmode)
28908         {
28909         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28910         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28911         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28912         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28913         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28914         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28915         default:
28916           return false;
28917         }
28918       break;
28919     case 1:
28920       switch (d->vmode)
28921         {
28922         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28923         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28924         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28925         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28926         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28927         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28928         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28929         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28930         default:
28931           return false;
28932         }
28933       break;
28934     default:
28935       return false;
28936     }
28937
28938   for (i = 0; i < nelt ; i += diff + 1)
28939     for (j = 0; j <= diff; j += 1)
28940       {
28941         /* This is guaranteed to be true as the value of diff
28942            is 7, 3, 1 and we should have enough elements in the
28943            queue to generate this. Getting a vector mask with a
28944            value of diff other than these values implies that
28945            something is wrong by the time we get here.  */
28946         gcc_assert (i + j < nelt);
28947         if (d->perm[i + j] != i + diff - j)
28948           return false;
28949       }
28950
28951   /* Success! */
28952   if (d->testing_p)
28953     return true;
28954
28955   emit_insn (gen (d->target, d->op0));
28956   return true;
28957 }
28958
28959 /* Recognize patterns for the VTRN insns.  */
28960
28961 static bool
28962 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28963 {
28964   unsigned int i, odd, mask, nelt = d->nelt;
28965   rtx out0, out1, in0, in1;
28966   rtx (*gen)(rtx, rtx, rtx, rtx);
28967
28968   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28969     return false;
28970
28971   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28972   if (d->perm[0] == 0)
28973     odd = 0;
28974   else if (d->perm[0] == 1)
28975     odd = 1;
28976   else
28977     return false;
28978   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28979
28980   for (i = 0; i < nelt; i += 2)
28981     {
28982       if (d->perm[i] != i + odd)
28983         return false;
28984       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28985         return false;
28986     }
28987
28988   /* Success!  */
28989   if (d->testing_p)
28990     return true;
28991
28992   switch (d->vmode)
28993     {
28994     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28995     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28996     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28997     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28998     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
28999     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29000     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29001     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29002     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29003     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29004     default:
29005       gcc_unreachable ();
29006     }
29007
29008   in0 = d->op0;
29009   in1 = d->op1;
29010   if (BYTES_BIG_ENDIAN)
29011     {
29012       std::swap (in0, in1);
29013       odd = !odd;
29014     }
29015
29016   out0 = d->target;
29017   out1 = gen_reg_rtx (d->vmode);
29018   if (odd)
29019     std::swap (out0, out1);
29020
29021   emit_insn (gen (out0, in0, in1, out1));
29022   return true;
29023 }
29024
29025 /* Recognize patterns for the VEXT insns.  */
29026
29027 static bool
29028 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29029 {
29030   unsigned int i, nelt = d->nelt;
29031   rtx (*gen) (rtx, rtx, rtx, rtx);
29032   rtx offset;
29033
29034   unsigned int location;
29035
29036   unsigned int next  = d->perm[0] + 1;
29037
29038   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29039   if (BYTES_BIG_ENDIAN)
29040     return false;
29041
29042   /* Check if the extracted indexes are increasing by one.  */
29043   for (i = 1; i < nelt; next++, i++)
29044     {
29045       /* If we hit the most significant element of the 2nd vector in
29046          the previous iteration, no need to test further.  */
29047       if (next == 2 * nelt)
29048         return false;
29049
29050       /* If we are operating on only one vector: it could be a
29051          rotation.  If there are only two elements of size < 64, let
29052          arm_evpc_neon_vrev catch it.  */
29053       if (d->one_vector_p && (next == nelt))
29054         {
29055           if ((nelt == 2) && (d->vmode != V2DImode))
29056             return false;
29057           else
29058             next = 0;
29059         }
29060
29061       if (d->perm[i] != next)
29062         return false;
29063     }
29064
29065   location = d->perm[0];
29066
29067   switch (d->vmode)
29068     {
29069     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29070     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29071     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29072     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29073     case E_V2SImode: gen = gen_neon_vextv2si; break;
29074     case E_V4SImode: gen = gen_neon_vextv4si; break;
29075     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29076     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29077     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29078     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29079     case E_V2DImode: gen = gen_neon_vextv2di; break;
29080     default:
29081       return false;
29082     }
29083
29084   /* Success! */
29085   if (d->testing_p)
29086     return true;
29087
29088   offset = GEN_INT (location);
29089   emit_insn (gen (d->target, d->op0, d->op1, offset));
29090   return true;
29091 }
29092
29093 /* The NEON VTBL instruction is a fully variable permuation that's even
29094    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29095    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29096    can do slightly better by expanding this as a constant where we don't
29097    have to apply a mask.  */
29098
29099 static bool
29100 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29101 {
29102   rtx rperm[MAX_VECT_LEN], sel;
29103   machine_mode vmode = d->vmode;
29104   unsigned int i, nelt = d->nelt;
29105
29106   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29107      numbering of elements for big-endian, we must reverse the order.  */
29108   if (BYTES_BIG_ENDIAN)
29109     return false;
29110
29111   if (d->testing_p)
29112     return true;
29113
29114   /* Generic code will try constant permutation twice.  Once with the
29115      original mode and again with the elements lowered to QImode.
29116      So wait and don't do the selector expansion ourselves.  */
29117   if (vmode != V8QImode && vmode != V16QImode)
29118     return false;
29119
29120   for (i = 0; i < nelt; ++i)
29121     rperm[i] = GEN_INT (d->perm[i]);
29122   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29123   sel = force_reg (vmode, sel);
29124
29125   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29126   return true;
29127 }
29128
29129 static bool
29130 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29131 {
29132   /* Check if the input mask matches vext before reordering the
29133      operands.  */
29134   if (TARGET_NEON)
29135     if (arm_evpc_neon_vext (d))
29136       return true;
29137
29138   /* The pattern matching functions above are written to look for a small
29139      number to begin the sequence (0, 1, N/2).  If we begin with an index
29140      from the second operand, we can swap the operands.  */
29141   if (d->perm[0] >= d->nelt)
29142     {
29143       unsigned i, nelt = d->nelt;
29144
29145       for (i = 0; i < nelt; ++i)
29146         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29147
29148       std::swap (d->op0, d->op1);
29149     }
29150
29151   if (TARGET_NEON)
29152     {
29153       if (arm_evpc_neon_vuzp (d))
29154         return true;
29155       if (arm_evpc_neon_vzip (d))
29156         return true;
29157       if (arm_evpc_neon_vrev (d))
29158         return true;
29159       if (arm_evpc_neon_vtrn (d))
29160         return true;
29161       return arm_evpc_neon_vtbl (d);
29162     }
29163   return false;
29164 }
29165
29166 /* Expand a vec_perm_const pattern.  */
29167
29168 bool
29169 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29170 {
29171   struct expand_vec_perm_d d;
29172   int i, nelt, which;
29173
29174   d.target = target;
29175   d.op0 = op0;
29176   d.op1 = op1;
29177
29178   d.vmode = GET_MODE (target);
29179   gcc_assert (VECTOR_MODE_P (d.vmode));
29180   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29181   d.testing_p = false;
29182
29183   for (i = which = 0; i < nelt; ++i)
29184     {
29185       rtx e = XVECEXP (sel, 0, i);
29186       int ei = INTVAL (e) & (2 * nelt - 1);
29187       which |= (ei < nelt ? 1 : 2);
29188       d.perm[i] = ei;
29189     }
29190
29191   switch (which)
29192     {
29193     default:
29194       gcc_unreachable();
29195
29196     case 3:
29197       d.one_vector_p = false;
29198       if (!rtx_equal_p (op0, op1))
29199         break;
29200
29201       /* The elements of PERM do not suggest that only the first operand
29202          is used, but both operands are identical.  Allow easier matching
29203          of the permutation by folding the permutation into the single
29204          input vector.  */
29205       /* FALLTHRU */
29206     case 2:
29207       for (i = 0; i < nelt; ++i)
29208         d.perm[i] &= nelt - 1;
29209       d.op0 = op1;
29210       d.one_vector_p = true;
29211       break;
29212
29213     case 1:
29214       d.op1 = op0;
29215       d.one_vector_p = true;
29216       break;
29217     }
29218
29219   return arm_expand_vec_perm_const_1 (&d);
29220 }
29221
29222 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29223
29224 static bool
29225 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29226                                  const unsigned char *sel)
29227 {
29228   struct expand_vec_perm_d d;
29229   unsigned int i, nelt, which;
29230   bool ret;
29231
29232   d.vmode = vmode;
29233   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29234   d.testing_p = true;
29235   memcpy (d.perm, sel, nelt);
29236
29237   /* Categorize the set of elements in the selector.  */
29238   for (i = which = 0; i < nelt; ++i)
29239     {
29240       unsigned char e = d.perm[i];
29241       gcc_assert (e < 2 * nelt);
29242       which |= (e < nelt ? 1 : 2);
29243     }
29244
29245   /* For all elements from second vector, fold the elements to first.  */
29246   if (which == 2)
29247     for (i = 0; i < nelt; ++i)
29248       d.perm[i] -= nelt;
29249
29250   /* Check whether the mask can be applied to the vector type.  */
29251   d.one_vector_p = (which != 3);
29252
29253   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29254   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29255   if (!d.one_vector_p)
29256     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29257
29258   start_sequence ();
29259   ret = arm_expand_vec_perm_const_1 (&d);
29260   end_sequence ();
29261
29262   return ret;
29263 }
29264
29265 bool
29266 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29267 {
29268   /* If we are soft float and we do not have ldrd
29269      then all auto increment forms are ok.  */
29270   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29271     return true;
29272
29273   switch (code)
29274     {
29275       /* Post increment and Pre Decrement are supported for all
29276          instruction forms except for vector forms.  */
29277     case ARM_POST_INC:
29278     case ARM_PRE_DEC:
29279       if (VECTOR_MODE_P (mode))
29280         {
29281           if (code != ARM_PRE_DEC)
29282             return true;
29283           else
29284             return false;
29285         }
29286
29287       return true;
29288
29289     case ARM_POST_DEC:
29290     case ARM_PRE_INC:
29291       /* Without LDRD and mode size greater than
29292          word size, there is no point in auto-incrementing
29293          because ldm and stm will not have these forms.  */
29294       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29295         return false;
29296
29297       /* Vector and floating point modes do not support
29298          these auto increment forms.  */
29299       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29300         return false;
29301
29302       return true;
29303
29304     default:
29305       return false;
29306
29307     }
29308
29309   return false;
29310 }
29311
29312 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29313    on ARM, since we know that shifts by negative amounts are no-ops.
29314    Additionally, the default expansion code is not available or suitable
29315    for post-reload insn splits (this can occur when the register allocator
29316    chooses not to do a shift in NEON).
29317
29318    This function is used in both initial expand and post-reload splits, and
29319    handles all kinds of 64-bit shifts.
29320
29321    Input requirements:
29322     - It is safe for the input and output to be the same register, but
29323       early-clobber rules apply for the shift amount and scratch registers.
29324     - Shift by register requires both scratch registers.  In all other cases
29325       the scratch registers may be NULL.
29326     - Ashiftrt by a register also clobbers the CC register.  */
29327 void
29328 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29329                                rtx amount, rtx scratch1, rtx scratch2)
29330 {
29331   rtx out_high = gen_highpart (SImode, out);
29332   rtx out_low = gen_lowpart (SImode, out);
29333   rtx in_high = gen_highpart (SImode, in);
29334   rtx in_low = gen_lowpart (SImode, in);
29335
29336   /* Terminology:
29337         in = the register pair containing the input value.
29338         out = the destination register pair.
29339         up = the high- or low-part of each pair.
29340         down = the opposite part to "up".
29341      In a shift, we can consider bits to shift from "up"-stream to
29342      "down"-stream, so in a left-shift "up" is the low-part and "down"
29343      is the high-part of each register pair.  */
29344
29345   rtx out_up   = code == ASHIFT ? out_low : out_high;
29346   rtx out_down = code == ASHIFT ? out_high : out_low;
29347   rtx in_up   = code == ASHIFT ? in_low : in_high;
29348   rtx in_down = code == ASHIFT ? in_high : in_low;
29349
29350   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29351   gcc_assert (out
29352               && (REG_P (out) || GET_CODE (out) == SUBREG)
29353               && GET_MODE (out) == DImode);
29354   gcc_assert (in
29355               && (REG_P (in) || GET_CODE (in) == SUBREG)
29356               && GET_MODE (in) == DImode);
29357   gcc_assert (amount
29358               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29359                    && GET_MODE (amount) == SImode)
29360                   || CONST_INT_P (amount)));
29361   gcc_assert (scratch1 == NULL
29362               || (GET_CODE (scratch1) == SCRATCH)
29363               || (GET_MODE (scratch1) == SImode
29364                   && REG_P (scratch1)));
29365   gcc_assert (scratch2 == NULL
29366               || (GET_CODE (scratch2) == SCRATCH)
29367               || (GET_MODE (scratch2) == SImode
29368                   && REG_P (scratch2)));
29369   gcc_assert (!REG_P (out) || !REG_P (amount)
29370               || !HARD_REGISTER_P (out)
29371               || (REGNO (out) != REGNO (amount)
29372                   && REGNO (out) + 1 != REGNO (amount)));
29373
29374   /* Macros to make following code more readable.  */
29375   #define SUB_32(DEST,SRC) \
29376             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29377   #define RSB_32(DEST,SRC) \
29378             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29379   #define SUB_S_32(DEST,SRC) \
29380             gen_addsi3_compare0 ((DEST), (SRC), \
29381                                  GEN_INT (-32))
29382   #define SET(DEST,SRC) \
29383             gen_rtx_SET ((DEST), (SRC))
29384   #define SHIFT(CODE,SRC,AMOUNT) \
29385             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29386   #define LSHIFT(CODE,SRC,AMOUNT) \
29387             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29388                             SImode, (SRC), (AMOUNT))
29389   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29390             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29391                             SImode, (SRC), (AMOUNT))
29392   #define ORR(A,B) \
29393             gen_rtx_IOR (SImode, (A), (B))
29394   #define BRANCH(COND,LABEL) \
29395             gen_arm_cond_branch ((LABEL), \
29396                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29397                                                    const0_rtx), \
29398                                  cc_reg)
29399
29400   /* Shifts by register and shifts by constant are handled separately.  */
29401   if (CONST_INT_P (amount))
29402     {
29403       /* We have a shift-by-constant.  */
29404
29405       /* First, handle out-of-range shift amounts.
29406          In both cases we try to match the result an ARM instruction in a
29407          shift-by-register would give.  This helps reduce execution
29408          differences between optimization levels, but it won't stop other
29409          parts of the compiler doing different things.  This is "undefined
29410          behavior, in any case.  */
29411       if (INTVAL (amount) <= 0)
29412         emit_insn (gen_movdi (out, in));
29413       else if (INTVAL (amount) >= 64)
29414         {
29415           if (code == ASHIFTRT)
29416             {
29417               rtx const31_rtx = GEN_INT (31);
29418               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29419               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29420             }
29421           else
29422             emit_insn (gen_movdi (out, const0_rtx));
29423         }
29424
29425       /* Now handle valid shifts. */
29426       else if (INTVAL (amount) < 32)
29427         {
29428           /* Shifts by a constant less than 32.  */
29429           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29430
29431           /* Clearing the out register in DImode first avoids lots
29432              of spilling and results in less stack usage.
29433              Later this redundant insn is completely removed.
29434              Do that only if "in" and "out" are different registers.  */
29435           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29436             emit_insn (SET (out, const0_rtx));
29437           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29438           emit_insn (SET (out_down,
29439                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29440                                out_down)));
29441           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29442         }
29443       else
29444         {
29445           /* Shifts by a constant greater than 31.  */
29446           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29447
29448           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29449             emit_insn (SET (out, const0_rtx));
29450           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29451           if (code == ASHIFTRT)
29452             emit_insn (gen_ashrsi3 (out_up, in_up,
29453                                     GEN_INT (31)));
29454           else
29455             emit_insn (SET (out_up, const0_rtx));
29456         }
29457     }
29458   else
29459     {
29460       /* We have a shift-by-register.  */
29461       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29462
29463       /* This alternative requires the scratch registers.  */
29464       gcc_assert (scratch1 && REG_P (scratch1));
29465       gcc_assert (scratch2 && REG_P (scratch2));
29466
29467       /* We will need the values "amount-32" and "32-amount" later.
29468          Swapping them around now allows the later code to be more general. */
29469       switch (code)
29470         {
29471         case ASHIFT:
29472           emit_insn (SUB_32 (scratch1, amount));
29473           emit_insn (RSB_32 (scratch2, amount));
29474           break;
29475         case ASHIFTRT:
29476           emit_insn (RSB_32 (scratch1, amount));
29477           /* Also set CC = amount > 32.  */
29478           emit_insn (SUB_S_32 (scratch2, amount));
29479           break;
29480         case LSHIFTRT:
29481           emit_insn (RSB_32 (scratch1, amount));
29482           emit_insn (SUB_32 (scratch2, amount));
29483           break;
29484         default:
29485           gcc_unreachable ();
29486         }
29487
29488       /* Emit code like this:
29489
29490          arithmetic-left:
29491             out_down = in_down << amount;
29492             out_down = (in_up << (amount - 32)) | out_down;
29493             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29494             out_up = in_up << amount;
29495
29496          arithmetic-right:
29497             out_down = in_down >> amount;
29498             out_down = (in_up << (32 - amount)) | out_down;
29499             if (amount < 32)
29500               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29501             out_up = in_up << amount;
29502
29503          logical-right:
29504             out_down = in_down >> amount;
29505             out_down = (in_up << (32 - amount)) | out_down;
29506             if (amount < 32)
29507               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29508             out_up = in_up << amount;
29509
29510           The ARM and Thumb2 variants are the same but implemented slightly
29511           differently.  If this were only called during expand we could just
29512           use the Thumb2 case and let combine do the right thing, but this
29513           can also be called from post-reload splitters.  */
29514
29515       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29516
29517       if (!TARGET_THUMB2)
29518         {
29519           /* Emit code for ARM mode.  */
29520           emit_insn (SET (out_down,
29521                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29522           if (code == ASHIFTRT)
29523             {
29524               rtx_code_label *done_label = gen_label_rtx ();
29525               emit_jump_insn (BRANCH (LT, done_label));
29526               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29527                                              out_down)));
29528               emit_label (done_label);
29529             }
29530           else
29531             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29532                                            out_down)));
29533         }
29534       else
29535         {
29536           /* Emit code for Thumb2 mode.
29537              Thumb2 can't do shift and or in one insn.  */
29538           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29539           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29540
29541           if (code == ASHIFTRT)
29542             {
29543               rtx_code_label *done_label = gen_label_rtx ();
29544               emit_jump_insn (BRANCH (LT, done_label));
29545               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29546               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29547               emit_label (done_label);
29548             }
29549           else
29550             {
29551               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29552               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29553             }
29554         }
29555
29556       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29557     }
29558
29559   #undef SUB_32
29560   #undef RSB_32
29561   #undef SUB_S_32
29562   #undef SET
29563   #undef SHIFT
29564   #undef LSHIFT
29565   #undef REV_LSHIFT
29566   #undef ORR
29567   #undef BRANCH
29568 }
29569
29570 /* Returns true if the pattern is a valid symbolic address, which is either a
29571    symbol_ref or (symbol_ref + addend).
29572
29573    According to the ARM ELF ABI, the initial addend of REL-type relocations
29574    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29575    literal field of the instruction as a 16-bit signed value in the range
29576    -32768 <= A < 32768.  */
29577
29578 bool
29579 arm_valid_symbolic_address_p (rtx addr)
29580 {
29581   rtx xop0, xop1 = NULL_RTX;
29582   rtx tmp = addr;
29583
29584   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29585     return true;
29586
29587   /* (const (plus: symbol_ref const_int))  */
29588   if (GET_CODE (addr) == CONST)
29589     tmp = XEXP (addr, 0);
29590
29591   if (GET_CODE (tmp) == PLUS)
29592     {
29593       xop0 = XEXP (tmp, 0);
29594       xop1 = XEXP (tmp, 1);
29595
29596       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29597           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29598     }
29599
29600   return false;
29601 }
29602
29603 /* Returns true if a valid comparison operation and makes
29604    the operands in a form that is valid.  */
29605 bool
29606 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29607 {
29608   enum rtx_code code = GET_CODE (*comparison);
29609   int code_int;
29610   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29611     ? GET_MODE (*op2) : GET_MODE (*op1);
29612
29613   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29614
29615   if (code == UNEQ || code == LTGT)
29616     return false;
29617
29618   code_int = (int)code;
29619   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29620   PUT_CODE (*comparison, (enum rtx_code)code_int);
29621
29622   switch (mode)
29623     {
29624     case E_SImode:
29625       if (!arm_add_operand (*op1, mode))
29626         *op1 = force_reg (mode, *op1);
29627       if (!arm_add_operand (*op2, mode))
29628         *op2 = force_reg (mode, *op2);
29629       return true;
29630
29631     case E_DImode:
29632       if (!cmpdi_operand (*op1, mode))
29633         *op1 = force_reg (mode, *op1);
29634       if (!cmpdi_operand (*op2, mode))
29635         *op2 = force_reg (mode, *op2);
29636       return true;
29637
29638     case E_HFmode:
29639       if (!TARGET_VFP_FP16INST)
29640         break;
29641       /* FP16 comparisons are done in SF mode.  */
29642       mode = SFmode;
29643       *op1 = convert_to_mode (mode, *op1, 1);
29644       *op2 = convert_to_mode (mode, *op2, 1);
29645       /* Fall through.  */
29646     case E_SFmode:
29647     case E_DFmode:
29648       if (!vfp_compare_operand (*op1, mode))
29649         *op1 = force_reg (mode, *op1);
29650       if (!vfp_compare_operand (*op2, mode))
29651         *op2 = force_reg (mode, *op2);
29652       return true;
29653     default:
29654       break;
29655     }
29656
29657   return false;
29658
29659 }
29660
29661 /* Maximum number of instructions to set block of memory.  */
29662 static int
29663 arm_block_set_max_insns (void)
29664 {
29665   if (optimize_function_for_size_p (cfun))
29666     return 4;
29667   else
29668     return current_tune->max_insns_inline_memset;
29669 }
29670
29671 /* Return TRUE if it's profitable to set block of memory for
29672    non-vectorized case.  VAL is the value to set the memory
29673    with.  LENGTH is the number of bytes to set.  ALIGN is the
29674    alignment of the destination memory in bytes.  UNALIGNED_P
29675    is TRUE if we can only set the memory with instructions
29676    meeting alignment requirements.  USE_STRD_P is TRUE if we
29677    can use strd to set the memory.  */
29678 static bool
29679 arm_block_set_non_vect_profit_p (rtx val,
29680                                  unsigned HOST_WIDE_INT length,
29681                                  unsigned HOST_WIDE_INT align,
29682                                  bool unaligned_p, bool use_strd_p)
29683 {
29684   int num = 0;
29685   /* For leftovers in bytes of 0-7, we can set the memory block using
29686      strb/strh/str with minimum instruction number.  */
29687   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29688
29689   if (unaligned_p)
29690     {
29691       num = arm_const_inline_cost (SET, val);
29692       num += length / align + length % align;
29693     }
29694   else if (use_strd_p)
29695     {
29696       num = arm_const_double_inline_cost (val);
29697       num += (length >> 3) + leftover[length & 7];
29698     }
29699   else
29700     {
29701       num = arm_const_inline_cost (SET, val);
29702       num += (length >> 2) + leftover[length & 3];
29703     }
29704
29705   /* We may be able to combine last pair STRH/STRB into a single STR
29706      by shifting one byte back.  */
29707   if (unaligned_access && length > 3 && (length & 3) == 3)
29708     num--;
29709
29710   return (num <= arm_block_set_max_insns ());
29711 }
29712
29713 /* Return TRUE if it's profitable to set block of memory for
29714    vectorized case.  LENGTH is the number of bytes to set.
29715    ALIGN is the alignment of destination memory in bytes.
29716    MODE is the vector mode used to set the memory.  */
29717 static bool
29718 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29719                              unsigned HOST_WIDE_INT align,
29720                              machine_mode mode)
29721 {
29722   int num;
29723   bool unaligned_p = ((align & 3) != 0);
29724   unsigned int nelt = GET_MODE_NUNITS (mode);
29725
29726   /* Instruction loading constant value.  */
29727   num = 1;
29728   /* Instructions storing the memory.  */
29729   num += (length + nelt - 1) / nelt;
29730   /* Instructions adjusting the address expression.  Only need to
29731      adjust address expression if it's 4 bytes aligned and bytes
29732      leftover can only be stored by mis-aligned store instruction.  */
29733   if (!unaligned_p && (length & 3) != 0)
29734     num++;
29735
29736   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29737   if (!unaligned_p && mode == V16QImode)
29738     num--;
29739
29740   return (num <= arm_block_set_max_insns ());
29741 }
29742
29743 /* Set a block of memory using vectorization instructions for the
29744    unaligned case.  We fill the first LENGTH bytes of the memory
29745    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29746    the alignment requirement of memory.  Return TRUE if succeeded.  */
29747 static bool
29748 arm_block_set_unaligned_vect (rtx dstbase,
29749                               unsigned HOST_WIDE_INT length,
29750                               unsigned HOST_WIDE_INT value,
29751                               unsigned HOST_WIDE_INT align)
29752 {
29753   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29754   rtx dst, mem;
29755   rtx val_elt, val_vec, reg;
29756   rtx rval[MAX_VECT_LEN];
29757   rtx (*gen_func) (rtx, rtx);
29758   machine_mode mode;
29759   unsigned HOST_WIDE_INT v = value;
29760   unsigned int offset = 0;
29761   gcc_assert ((align & 0x3) != 0);
29762   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29763   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29764   if (length >= nelt_v16)
29765     {
29766       mode = V16QImode;
29767       gen_func = gen_movmisalignv16qi;
29768     }
29769   else
29770     {
29771       mode = V8QImode;
29772       gen_func = gen_movmisalignv8qi;
29773     }
29774   nelt_mode = GET_MODE_NUNITS (mode);
29775   gcc_assert (length >= nelt_mode);
29776   /* Skip if it isn't profitable.  */
29777   if (!arm_block_set_vect_profit_p (length, align, mode))
29778     return false;
29779
29780   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29781   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29782
29783   v = sext_hwi (v, BITS_PER_WORD);
29784   val_elt = GEN_INT (v);
29785   for (j = 0; j < nelt_mode; j++)
29786     rval[j] = val_elt;
29787
29788   reg = gen_reg_rtx (mode);
29789   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29790   /* Emit instruction loading the constant value.  */
29791   emit_move_insn (reg, val_vec);
29792
29793   /* Handle nelt_mode bytes in a vector.  */
29794   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29795     {
29796       emit_insn ((*gen_func) (mem, reg));
29797       if (i + 2 * nelt_mode <= length)
29798         {
29799           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29800           offset += nelt_mode;
29801           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29802         }
29803     }
29804
29805   /* If there are not less than nelt_v8 bytes leftover, we must be in
29806      V16QI mode.  */
29807   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29808
29809   /* Handle (8, 16) bytes leftover.  */
29810   if (i + nelt_v8 < length)
29811     {
29812       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29813       offset += length - i;
29814       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29815
29816       /* We are shifting bytes back, set the alignment accordingly.  */
29817       if ((length & 1) != 0 && align >= 2)
29818         set_mem_align (mem, BITS_PER_UNIT);
29819
29820       emit_insn (gen_movmisalignv16qi (mem, reg));
29821     }
29822   /* Handle (0, 8] bytes leftover.  */
29823   else if (i < length && i + nelt_v8 >= length)
29824     {
29825       if (mode == V16QImode)
29826         reg = gen_lowpart (V8QImode, reg);
29827
29828       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29829                                               + (nelt_mode - nelt_v8))));
29830       offset += (length - i) + (nelt_mode - nelt_v8);
29831       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29832
29833       /* We are shifting bytes back, set the alignment accordingly.  */
29834       if ((length & 1) != 0 && align >= 2)
29835         set_mem_align (mem, BITS_PER_UNIT);
29836
29837       emit_insn (gen_movmisalignv8qi (mem, reg));
29838     }
29839
29840   return true;
29841 }
29842
29843 /* Set a block of memory using vectorization instructions for the
29844    aligned case.  We fill the first LENGTH bytes of the memory area
29845    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29846    alignment requirement of memory.  Return TRUE if succeeded.  */
29847 static bool
29848 arm_block_set_aligned_vect (rtx dstbase,
29849                             unsigned HOST_WIDE_INT length,
29850                             unsigned HOST_WIDE_INT value,
29851                             unsigned HOST_WIDE_INT align)
29852 {
29853   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29854   rtx dst, addr, mem;
29855   rtx val_elt, val_vec, reg;
29856   rtx rval[MAX_VECT_LEN];
29857   machine_mode mode;
29858   unsigned HOST_WIDE_INT v = value;
29859   unsigned int offset = 0;
29860
29861   gcc_assert ((align & 0x3) == 0);
29862   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29863   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29864   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29865     mode = V16QImode;
29866   else
29867     mode = V8QImode;
29868
29869   nelt_mode = GET_MODE_NUNITS (mode);
29870   gcc_assert (length >= nelt_mode);
29871   /* Skip if it isn't profitable.  */
29872   if (!arm_block_set_vect_profit_p (length, align, mode))
29873     return false;
29874
29875   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29876
29877   v = sext_hwi (v, BITS_PER_WORD);
29878   val_elt = GEN_INT (v);
29879   for (j = 0; j < nelt_mode; j++)
29880     rval[j] = val_elt;
29881
29882   reg = gen_reg_rtx (mode);
29883   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29884   /* Emit instruction loading the constant value.  */
29885   emit_move_insn (reg, val_vec);
29886
29887   i = 0;
29888   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29889   if (mode == V16QImode)
29890     {
29891       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29892       emit_insn (gen_movmisalignv16qi (mem, reg));
29893       i += nelt_mode;
29894       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29895       if (i + nelt_v8 < length && i + nelt_v16 > length)
29896         {
29897           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29898           offset += length - nelt_mode;
29899           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29900           /* We are shifting bytes back, set the alignment accordingly.  */
29901           if ((length & 0x3) == 0)
29902             set_mem_align (mem, BITS_PER_UNIT * 4);
29903           else if ((length & 0x1) == 0)
29904             set_mem_align (mem, BITS_PER_UNIT * 2);
29905           else
29906             set_mem_align (mem, BITS_PER_UNIT);
29907
29908           emit_insn (gen_movmisalignv16qi (mem, reg));
29909           return true;
29910         }
29911       /* Fall through for bytes leftover.  */
29912       mode = V8QImode;
29913       nelt_mode = GET_MODE_NUNITS (mode);
29914       reg = gen_lowpart (V8QImode, reg);
29915     }
29916
29917   /* Handle 8 bytes in a vector.  */
29918   for (; (i + nelt_mode <= length); i += nelt_mode)
29919     {
29920       addr = plus_constant (Pmode, dst, i);
29921       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29922       emit_move_insn (mem, reg);
29923     }
29924
29925   /* Handle single word leftover by shifting 4 bytes back.  We can
29926      use aligned access for this case.  */
29927   if (i + UNITS_PER_WORD == length)
29928     {
29929       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29930       offset += i - UNITS_PER_WORD;
29931       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29932       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29933       if (align > UNITS_PER_WORD)
29934         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29935
29936       emit_move_insn (mem, reg);
29937     }
29938   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29939      We have to use unaligned access for this case.  */
29940   else if (i < length)
29941     {
29942       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29943       offset += length - nelt_mode;
29944       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29945       /* We are shifting bytes back, set the alignment accordingly.  */
29946       if ((length & 1) == 0)
29947         set_mem_align (mem, BITS_PER_UNIT * 2);
29948       else
29949         set_mem_align (mem, BITS_PER_UNIT);
29950
29951       emit_insn (gen_movmisalignv8qi (mem, reg));
29952     }
29953
29954   return true;
29955 }
29956
29957 /* Set a block of memory using plain strh/strb instructions, only
29958    using instructions allowed by ALIGN on processor.  We fill the
29959    first LENGTH bytes of the memory area starting from DSTBASE
29960    with byte constant VALUE.  ALIGN is the alignment requirement
29961    of memory.  */
29962 static bool
29963 arm_block_set_unaligned_non_vect (rtx dstbase,
29964                                   unsigned HOST_WIDE_INT length,
29965                                   unsigned HOST_WIDE_INT value,
29966                                   unsigned HOST_WIDE_INT align)
29967 {
29968   unsigned int i;
29969   rtx dst, addr, mem;
29970   rtx val_exp, val_reg, reg;
29971   machine_mode mode;
29972   HOST_WIDE_INT v = value;
29973
29974   gcc_assert (align == 1 || align == 2);
29975
29976   if (align == 2)
29977     v |= (value << BITS_PER_UNIT);
29978
29979   v = sext_hwi (v, BITS_PER_WORD);
29980   val_exp = GEN_INT (v);
29981   /* Skip if it isn't profitable.  */
29982   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29983                                         align, true, false))
29984     return false;
29985
29986   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29987   mode = (align == 2 ? HImode : QImode);
29988   val_reg = force_reg (SImode, val_exp);
29989   reg = gen_lowpart (mode, val_reg);
29990
29991   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29992     {
29993       addr = plus_constant (Pmode, dst, i);
29994       mem = adjust_automodify_address (dstbase, mode, addr, i);
29995       emit_move_insn (mem, reg);
29996     }
29997
29998   /* Handle single byte leftover.  */
29999   if (i + 1 == length)
30000     {
30001       reg = gen_lowpart (QImode, val_reg);
30002       addr = plus_constant (Pmode, dst, i);
30003       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30004       emit_move_insn (mem, reg);
30005       i++;
30006     }
30007
30008   gcc_assert (i == length);
30009   return true;
30010 }
30011
30012 /* Set a block of memory using plain strd/str/strh/strb instructions,
30013    to permit unaligned copies on processors which support unaligned
30014    semantics for those instructions.  We fill the first LENGTH bytes
30015    of the memory area starting from DSTBASE with byte constant VALUE.
30016    ALIGN is the alignment requirement of memory.  */
30017 static bool
30018 arm_block_set_aligned_non_vect (rtx dstbase,
30019                                 unsigned HOST_WIDE_INT length,
30020                                 unsigned HOST_WIDE_INT value,
30021                                 unsigned HOST_WIDE_INT align)
30022 {
30023   unsigned int i;
30024   rtx dst, addr, mem;
30025   rtx val_exp, val_reg, reg;
30026   unsigned HOST_WIDE_INT v;
30027   bool use_strd_p;
30028
30029   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30030                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30031
30032   v = (value | (value << 8) | (value << 16) | (value << 24));
30033   if (length < UNITS_PER_WORD)
30034     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30035
30036   if (use_strd_p)
30037     v |= (v << BITS_PER_WORD);
30038   else
30039     v = sext_hwi (v, BITS_PER_WORD);
30040
30041   val_exp = GEN_INT (v);
30042   /* Skip if it isn't profitable.  */
30043   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30044                                         align, false, use_strd_p))
30045     {
30046       if (!use_strd_p)
30047         return false;
30048
30049       /* Try without strd.  */
30050       v = (v >> BITS_PER_WORD);
30051       v = sext_hwi (v, BITS_PER_WORD);
30052       val_exp = GEN_INT (v);
30053       use_strd_p = false;
30054       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30055                                             align, false, use_strd_p))
30056         return false;
30057     }
30058
30059   i = 0;
30060   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30061   /* Handle double words using strd if possible.  */
30062   if (use_strd_p)
30063     {
30064       val_reg = force_reg (DImode, val_exp);
30065       reg = val_reg;
30066       for (; (i + 8 <= length); i += 8)
30067         {
30068           addr = plus_constant (Pmode, dst, i);
30069           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30070           emit_move_insn (mem, reg);
30071         }
30072     }
30073   else
30074     val_reg = force_reg (SImode, val_exp);
30075
30076   /* Handle words.  */
30077   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30078   for (; (i + 4 <= length); i += 4)
30079     {
30080       addr = plus_constant (Pmode, dst, i);
30081       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30082       if ((align & 3) == 0)
30083         emit_move_insn (mem, reg);
30084       else
30085         emit_insn (gen_unaligned_storesi (mem, reg));
30086     }
30087
30088   /* Merge last pair of STRH and STRB into a STR if possible.  */
30089   if (unaligned_access && i > 0 && (i + 3) == length)
30090     {
30091       addr = plus_constant (Pmode, dst, i - 1);
30092       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30093       /* We are shifting one byte back, set the alignment accordingly.  */
30094       if ((align & 1) == 0)
30095         set_mem_align (mem, BITS_PER_UNIT);
30096
30097       /* Most likely this is an unaligned access, and we can't tell at
30098          compilation time.  */
30099       emit_insn (gen_unaligned_storesi (mem, reg));
30100       return true;
30101     }
30102
30103   /* Handle half word leftover.  */
30104   if (i + 2 <= length)
30105     {
30106       reg = gen_lowpart (HImode, val_reg);
30107       addr = plus_constant (Pmode, dst, i);
30108       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30109       if ((align & 1) == 0)
30110         emit_move_insn (mem, reg);
30111       else
30112         emit_insn (gen_unaligned_storehi (mem, reg));
30113
30114       i += 2;
30115     }
30116
30117   /* Handle single byte leftover.  */
30118   if (i + 1 == length)
30119     {
30120       reg = gen_lowpart (QImode, val_reg);
30121       addr = plus_constant (Pmode, dst, i);
30122       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30123       emit_move_insn (mem, reg);
30124     }
30125
30126   return true;
30127 }
30128
30129 /* Set a block of memory using vectorization instructions for both
30130    aligned and unaligned cases.  We fill the first LENGTH bytes of
30131    the memory area starting from DSTBASE with byte constant VALUE.
30132    ALIGN is the alignment requirement of memory.  */
30133 static bool
30134 arm_block_set_vect (rtx dstbase,
30135                     unsigned HOST_WIDE_INT length,
30136                     unsigned HOST_WIDE_INT value,
30137                     unsigned HOST_WIDE_INT align)
30138 {
30139   /* Check whether we need to use unaligned store instruction.  */
30140   if (((align & 3) != 0 || (length & 3) != 0)
30141       /* Check whether unaligned store instruction is available.  */
30142       && (!unaligned_access || BYTES_BIG_ENDIAN))
30143     return false;
30144
30145   if ((align & 3) == 0)
30146     return arm_block_set_aligned_vect (dstbase, length, value, align);
30147   else
30148     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30149 }
30150
30151 /* Expand string store operation.  Firstly we try to do that by using
30152    vectorization instructions, then try with ARM unaligned access and
30153    double-word store if profitable.  OPERANDS[0] is the destination,
30154    OPERANDS[1] is the number of bytes, operands[2] is the value to
30155    initialize the memory, OPERANDS[3] is the known alignment of the
30156    destination.  */
30157 bool
30158 arm_gen_setmem (rtx *operands)
30159 {
30160   rtx dstbase = operands[0];
30161   unsigned HOST_WIDE_INT length;
30162   unsigned HOST_WIDE_INT value;
30163   unsigned HOST_WIDE_INT align;
30164
30165   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30166     return false;
30167
30168   length = UINTVAL (operands[1]);
30169   if (length > 64)
30170     return false;
30171
30172   value = (UINTVAL (operands[2]) & 0xFF);
30173   align = UINTVAL (operands[3]);
30174   if (TARGET_NEON && length >= 8
30175       && current_tune->string_ops_prefer_neon
30176       && arm_block_set_vect (dstbase, length, value, align))
30177     return true;
30178
30179   if (!unaligned_access && (align & 3) != 0)
30180     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30181
30182   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30183 }
30184
30185
30186 static bool
30187 arm_macro_fusion_p (void)
30188 {
30189   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30190 }
30191
30192 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30193    for MOVW / MOVT macro fusion.  */
30194
30195 static bool
30196 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30197 {
30198   /* We are trying to fuse
30199      movw imm / movt imm
30200     instructions as a group that gets scheduled together.  */
30201
30202   rtx set_dest = SET_DEST (curr_set);
30203
30204   if (GET_MODE (set_dest) != SImode)
30205     return false;
30206
30207   /* We are trying to match:
30208      prev (movw)  == (set (reg r0) (const_int imm16))
30209      curr (movt) == (set (zero_extract (reg r0)
30210                                         (const_int 16)
30211                                         (const_int 16))
30212                           (const_int imm16_1))
30213      or
30214      prev (movw) == (set (reg r1)
30215                           (high (symbol_ref ("SYM"))))
30216     curr (movt) == (set (reg r0)
30217                         (lo_sum (reg r1)
30218                                 (symbol_ref ("SYM"))))  */
30219
30220     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30221       {
30222         if (CONST_INT_P (SET_SRC (curr_set))
30223             && CONST_INT_P (SET_SRC (prev_set))
30224             && REG_P (XEXP (set_dest, 0))
30225             && REG_P (SET_DEST (prev_set))
30226             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30227           return true;
30228
30229       }
30230     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30231              && REG_P (SET_DEST (curr_set))
30232              && REG_P (SET_DEST (prev_set))
30233              && GET_CODE (SET_SRC (prev_set)) == HIGH
30234              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30235       return true;
30236
30237   return false;
30238 }
30239
30240 static bool
30241 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30242 {
30243   rtx prev_set = single_set (prev);
30244   rtx curr_set = single_set (curr);
30245
30246   if (!prev_set
30247       || !curr_set)
30248     return false;
30249
30250   if (any_condjump_p (curr))
30251     return false;
30252
30253   if (!arm_macro_fusion_p ())
30254     return false;
30255
30256   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30257       && aarch_crypto_can_dual_issue (prev, curr))
30258     return true;
30259
30260   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30261       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30262     return true;
30263
30264   return false;
30265 }
30266
30267 /* Return true iff the instruction fusion described by OP is enabled.  */
30268 bool
30269 arm_fusion_enabled_p (tune_params::fuse_ops op)
30270 {
30271   return current_tune->fusible_ops & op;
30272 }
30273
30274 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30275    scheduled for speculative execution.  Reject the long-running division
30276    and square-root instructions.  */
30277
30278 static bool
30279 arm_sched_can_speculate_insn (rtx_insn *insn)
30280 {
30281   switch (get_attr_type (insn))
30282     {
30283       case TYPE_SDIV:
30284       case TYPE_UDIV:
30285       case TYPE_FDIVS:
30286       case TYPE_FDIVD:
30287       case TYPE_FSQRTS:
30288       case TYPE_FSQRTD:
30289       case TYPE_NEON_FP_SQRT_S:
30290       case TYPE_NEON_FP_SQRT_D:
30291       case TYPE_NEON_FP_SQRT_S_Q:
30292       case TYPE_NEON_FP_SQRT_D_Q:
30293       case TYPE_NEON_FP_DIV_S:
30294       case TYPE_NEON_FP_DIV_D:
30295       case TYPE_NEON_FP_DIV_S_Q:
30296       case TYPE_NEON_FP_DIV_D_Q:
30297         return false;
30298       default:
30299         return true;
30300     }
30301 }
30302
30303 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30304
30305 static unsigned HOST_WIDE_INT
30306 arm_asan_shadow_offset (void)
30307 {
30308   return HOST_WIDE_INT_1U << 29;
30309 }
30310
30311
30312 /* This is a temporary fix for PR60655.  Ideally we need
30313    to handle most of these cases in the generic part but
30314    currently we reject minus (..) (sym_ref).  We try to
30315    ameliorate the case with minus (sym_ref1) (sym_ref2)
30316    where they are in the same section.  */
30317
30318 static bool
30319 arm_const_not_ok_for_debug_p (rtx p)
30320 {
30321   tree decl_op0 = NULL;
30322   tree decl_op1 = NULL;
30323
30324   if (GET_CODE (p) == MINUS)
30325     {
30326       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30327         {
30328           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30329           if (decl_op1
30330               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30331               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30332             {
30333               if ((VAR_P (decl_op1)
30334                    || TREE_CODE (decl_op1) == CONST_DECL)
30335                   && (VAR_P (decl_op0)
30336                       || TREE_CODE (decl_op0) == CONST_DECL))
30337                 return (get_variable_section (decl_op1, false)
30338                         != get_variable_section (decl_op0, false));
30339
30340               if (TREE_CODE (decl_op1) == LABEL_DECL
30341                   && TREE_CODE (decl_op0) == LABEL_DECL)
30342                 return (DECL_CONTEXT (decl_op1)
30343                         != DECL_CONTEXT (decl_op0));
30344             }
30345
30346           return true;
30347         }
30348     }
30349
30350   return false;
30351 }
30352
30353 /* return TRUE if x is a reference to a value in a constant pool */
30354 extern bool
30355 arm_is_constant_pool_ref (rtx x)
30356 {
30357   return (MEM_P (x)
30358           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30359           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30360 }
30361
30362 /* Remember the last target of arm_set_current_function.  */
30363 static GTY(()) tree arm_previous_fndecl;
30364
30365 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30366
30367 void
30368 save_restore_target_globals (tree new_tree)
30369 {
30370   /* If we have a previous state, use it.  */
30371   if (TREE_TARGET_GLOBALS (new_tree))
30372     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30373   else if (new_tree == target_option_default_node)
30374     restore_target_globals (&default_target_globals);
30375   else
30376     {
30377       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30378       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30379     }
30380
30381   arm_option_params_internal ();
30382 }
30383
30384 /* Invalidate arm_previous_fndecl.  */
30385
30386 void
30387 arm_reset_previous_fndecl (void)
30388 {
30389   arm_previous_fndecl = NULL_TREE;
30390 }
30391
30392 /* Establish appropriate back-end context for processing the function
30393    FNDECL.  The argument might be NULL to indicate processing at top
30394    level, outside of any function scope.  */
30395
30396 static void
30397 arm_set_current_function (tree fndecl)
30398 {
30399   if (!fndecl || fndecl == arm_previous_fndecl)
30400     return;
30401
30402   tree old_tree = (arm_previous_fndecl
30403                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30404                    : NULL_TREE);
30405
30406   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30407
30408   /* If current function has no attributes but previous one did,
30409      use the default node.  */
30410   if (! new_tree && old_tree)
30411     new_tree = target_option_default_node;
30412
30413   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30414      the default have been handled by save_restore_target_globals from
30415      arm_pragma_target_parse.  */
30416   if (old_tree == new_tree)
30417     return;
30418
30419   arm_previous_fndecl = fndecl;
30420
30421   /* First set the target options.  */
30422   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30423
30424   save_restore_target_globals (new_tree);
30425 }
30426
30427 /* Implement TARGET_OPTION_PRINT.  */
30428
30429 static void
30430 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30431 {
30432   int flags = ptr->x_target_flags;
30433   const char *fpu_name;
30434
30435   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30436               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30437
30438   fprintf (file, "%*sselected isa %s\n", indent, "",
30439            TARGET_THUMB2_P (flags) ? "thumb2" :
30440            TARGET_THUMB_P (flags) ? "thumb1" :
30441            "arm");
30442
30443   if (ptr->x_arm_arch_string)
30444     fprintf (file, "%*sselected architecture %s\n", indent, "",
30445              ptr->x_arm_arch_string);
30446
30447   if (ptr->x_arm_cpu_string)
30448     fprintf (file, "%*sselected CPU %s\n", indent, "",
30449              ptr->x_arm_cpu_string);
30450
30451   if (ptr->x_arm_tune_string)
30452     fprintf (file, "%*sselected tune %s\n", indent, "",
30453              ptr->x_arm_tune_string);
30454
30455   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30456 }
30457
30458 /* Hook to determine if one function can safely inline another.  */
30459
30460 static bool
30461 arm_can_inline_p (tree caller, tree callee)
30462 {
30463   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30464   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30465   bool can_inline = true;
30466
30467   struct cl_target_option *caller_opts
30468         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30469                                            : target_option_default_node);
30470
30471   struct cl_target_option *callee_opts
30472         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30473                                            : target_option_default_node);
30474
30475   if (callee_opts == caller_opts)
30476     return true;
30477
30478   /* Callee's ISA features should be a subset of the caller's.  */
30479   struct arm_build_target caller_target;
30480   struct arm_build_target callee_target;
30481   caller_target.isa = sbitmap_alloc (isa_num_bits);
30482   callee_target.isa = sbitmap_alloc (isa_num_bits);
30483
30484   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30485                               false);
30486   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30487                               false);
30488   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30489     can_inline = false;
30490
30491   sbitmap_free (caller_target.isa);
30492   sbitmap_free (callee_target.isa);
30493
30494   /* OK to inline between different modes.
30495      Function with mode specific instructions, e.g using asm,
30496      must be explicitly protected with noinline.  */
30497   return can_inline;
30498 }
30499
30500 /* Hook to fix function's alignment affected by target attribute.  */
30501
30502 static void
30503 arm_relayout_function (tree fndecl)
30504 {
30505   if (DECL_USER_ALIGN (fndecl))
30506     return;
30507
30508   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30509
30510   if (!callee_tree)
30511     callee_tree = target_option_default_node;
30512
30513   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30514   SET_DECL_ALIGN
30515     (fndecl,
30516      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30517 }
30518
30519 /* Inner function to process the attribute((target(...))), take an argument and
30520    set the current options from the argument.  If we have a list, recursively
30521    go over the list.  */
30522
30523 static bool
30524 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30525 {
30526   if (TREE_CODE (args) == TREE_LIST)
30527     {
30528       bool ret = true;
30529
30530       for (; args; args = TREE_CHAIN (args))
30531         if (TREE_VALUE (args)
30532             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30533           ret = false;
30534       return ret;
30535     }
30536
30537   else if (TREE_CODE (args) != STRING_CST)
30538     {
30539       error ("attribute %<target%> argument not a string");
30540       return false;
30541     }
30542
30543   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30544   char *q;
30545
30546   while ((q = strtok (argstr, ",")) != NULL)
30547     {
30548       while (ISSPACE (*q)) ++q;
30549
30550       argstr = NULL;
30551       if (!strncmp (q, "thumb", 5))
30552           opts->x_target_flags |= MASK_THUMB;
30553
30554       else if (!strncmp (q, "arm", 3))
30555           opts->x_target_flags &= ~MASK_THUMB;
30556
30557       else if (!strncmp (q, "fpu=", 4))
30558         {
30559           int fpu_index;
30560           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30561                                        &fpu_index, CL_TARGET))
30562             {
30563               error ("invalid fpu for attribute(target(\"%s\"))", q);
30564               return false;
30565             }
30566           if (fpu_index == TARGET_FPU_auto)
30567             {
30568               /* This doesn't really make sense until we support
30569                  general dynamic selection of the architecture and all
30570                  sub-features.  */
30571               sorry ("auto fpu selection not currently permitted here");
30572               return false;
30573             }
30574           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30575         }
30576       else
30577         {
30578           error ("attribute(target(\"%s\")) is unknown", q);
30579           return false;
30580         }
30581     }
30582
30583   return true;
30584 }
30585
30586 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30587
30588 tree
30589 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30590                                  struct gcc_options *opts_set)
30591 {
30592   struct cl_target_option cl_opts;
30593
30594   if (!arm_valid_target_attribute_rec (args, opts))
30595     return NULL_TREE;
30596
30597   cl_target_option_save (&cl_opts, opts);
30598   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30599   arm_option_check_internal (opts);
30600   /* Do any overrides, such as global options arch=xxx.  */
30601   arm_option_override_internal (opts, opts_set);
30602
30603   return build_target_option_node (opts);
30604 }
30605
30606 static void
30607 add_attribute  (const char * mode, tree *attributes)
30608 {
30609   size_t len = strlen (mode);
30610   tree value = build_string (len, mode);
30611
30612   TREE_TYPE (value) = build_array_type (char_type_node,
30613                                         build_index_type (size_int (len)));
30614
30615   *attributes = tree_cons (get_identifier ("target"),
30616                            build_tree_list (NULL_TREE, value),
30617                            *attributes);
30618 }
30619
30620 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30621
30622 static void
30623 arm_insert_attributes (tree fndecl, tree * attributes)
30624 {
30625   const char *mode;
30626
30627   if (! TARGET_FLIP_THUMB)
30628     return;
30629
30630   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30631       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30632    return;
30633
30634   /* Nested definitions must inherit mode.  */
30635   if (current_function_decl)
30636    {
30637      mode = TARGET_THUMB ? "thumb" : "arm";
30638      add_attribute (mode, attributes);
30639      return;
30640    }
30641
30642   /* If there is already a setting don't change it.  */
30643   if (lookup_attribute ("target", *attributes) != NULL)
30644     return;
30645
30646   mode = thumb_flipper ? "thumb" : "arm";
30647   add_attribute (mode, attributes);
30648
30649   thumb_flipper = !thumb_flipper;
30650 }
30651
30652 /* Hook to validate attribute((target("string"))).  */
30653
30654 static bool
30655 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30656                               tree args, int ARG_UNUSED (flags))
30657 {
30658   bool ret = true;
30659   struct gcc_options func_options;
30660   tree cur_tree, new_optimize;
30661   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30662
30663   /* Get the optimization options of the current function.  */
30664   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30665
30666   /* If the function changed the optimization levels as well as setting target
30667      options, start with the optimizations specified.  */
30668   if (!func_optimize)
30669     func_optimize = optimization_default_node;
30670
30671   /* Init func_options.  */
30672   memset (&func_options, 0, sizeof (func_options));
30673   init_options_struct (&func_options, NULL);
30674   lang_hooks.init_options_struct (&func_options);
30675
30676   /* Initialize func_options to the defaults.  */
30677   cl_optimization_restore (&func_options,
30678                            TREE_OPTIMIZATION (func_optimize));
30679
30680   cl_target_option_restore (&func_options,
30681                             TREE_TARGET_OPTION (target_option_default_node));
30682
30683   /* Set func_options flags with new target mode.  */
30684   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30685                                               &global_options_set);
30686
30687   if (cur_tree == NULL_TREE)
30688     ret = false;
30689
30690   new_optimize = build_optimization_node (&func_options);
30691
30692   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30693
30694   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30695
30696   finalize_options_struct (&func_options);
30697
30698   return ret;
30699 }
30700
30701 /* Match an ISA feature bitmap to a named FPU.  We always use the
30702    first entry that exactly matches the feature set, so that we
30703    effectively canonicalize the FPU name for the assembler.  */
30704 static const char*
30705 arm_identify_fpu_from_isa (sbitmap isa)
30706 {
30707   auto_sbitmap fpubits (isa_num_bits);
30708   auto_sbitmap cand_fpubits (isa_num_bits);
30709
30710   bitmap_and (fpubits, isa, isa_all_fpubits);
30711
30712   /* If there are no ISA feature bits relating to the FPU, we must be
30713      doing soft-float.  */
30714   if (bitmap_empty_p (fpubits))
30715     return "softvfp";
30716
30717   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30718     {
30719       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30720       if (bitmap_equal_p (fpubits, cand_fpubits))
30721         return all_fpus[i].name;
30722     }
30723   /* We must find an entry, or things have gone wrong.  */
30724   gcc_unreachable ();
30725 }
30726
30727 void
30728 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30729 {
30730
30731   fprintf (stream, "\t.syntax unified\n");
30732
30733   if (TARGET_THUMB)
30734     {
30735       if (is_called_in_ARM_mode (decl)
30736           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30737               && cfun->is_thunk))
30738         fprintf (stream, "\t.code 32\n");
30739       else if (TARGET_THUMB1)
30740         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30741       else
30742         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30743     }
30744   else
30745     fprintf (stream, "\t.arm\n");
30746
30747   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30748                (TARGET_SOFT_FLOAT
30749                 ? "softvfp"
30750                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30751
30752   if (TARGET_POKE_FUNCTION_NAME)
30753     arm_poke_function_name (stream, (const char *) name);
30754 }
30755
30756 /* If MEM is in the form of [base+offset], extract the two parts
30757    of address and set to BASE and OFFSET, otherwise return false
30758    after clearing BASE and OFFSET.  */
30759
30760 static bool
30761 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30762 {
30763   rtx addr;
30764
30765   gcc_assert (MEM_P (mem));
30766
30767   addr = XEXP (mem, 0);
30768
30769   /* Strip off const from addresses like (const (addr)).  */
30770   if (GET_CODE (addr) == CONST)
30771     addr = XEXP (addr, 0);
30772
30773   if (GET_CODE (addr) == REG)
30774     {
30775       *base = addr;
30776       *offset = const0_rtx;
30777       return true;
30778     }
30779
30780   if (GET_CODE (addr) == PLUS
30781       && GET_CODE (XEXP (addr, 0)) == REG
30782       && CONST_INT_P (XEXP (addr, 1)))
30783     {
30784       *base = XEXP (addr, 0);
30785       *offset = XEXP (addr, 1);
30786       return true;
30787     }
30788
30789   *base = NULL_RTX;
30790   *offset = NULL_RTX;
30791
30792   return false;
30793 }
30794
30795 /* If INSN is a load or store of address in the form of [base+offset],
30796    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30797    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30798    otherwise return FALSE.  */
30799
30800 static bool
30801 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30802 {
30803   rtx x, dest, src;
30804
30805   gcc_assert (INSN_P (insn));
30806   x = PATTERN (insn);
30807   if (GET_CODE (x) != SET)
30808     return false;
30809
30810   src = SET_SRC (x);
30811   dest = SET_DEST (x);
30812   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30813     {
30814       *is_load = false;
30815       extract_base_offset_in_addr (dest, base, offset);
30816     }
30817   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30818     {
30819       *is_load = true;
30820       extract_base_offset_in_addr (src, base, offset);
30821     }
30822   else
30823     return false;
30824
30825   return (*base != NULL_RTX && *offset != NULL_RTX);
30826 }
30827
30828 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30829
30830    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30831    and PRI are only calculated for these instructions.  For other instruction,
30832    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30833    instruction fusion can be supported by returning different priorities.
30834
30835    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30836
30837 static void
30838 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30839                            int *fusion_pri, int *pri)
30840 {
30841   int tmp, off_val;
30842   bool is_load;
30843   rtx base, offset;
30844
30845   gcc_assert (INSN_P (insn));
30846
30847   tmp = max_pri - 1;
30848   if (!fusion_load_store (insn, &base, &offset, &is_load))
30849     {
30850       *pri = tmp;
30851       *fusion_pri = tmp;
30852       return;
30853     }
30854
30855   /* Load goes first.  */
30856   if (is_load)
30857     *fusion_pri = tmp - 1;
30858   else
30859     *fusion_pri = tmp - 2;
30860
30861   tmp /= 2;
30862
30863   /* INSN with smaller base register goes first.  */
30864   tmp -= ((REGNO (base) & 0xff) << 20);
30865
30866   /* INSN with smaller offset goes first.  */
30867   off_val = (int)(INTVAL (offset));
30868   if (off_val >= 0)
30869     tmp -= (off_val & 0xfffff);
30870   else
30871     tmp += ((- off_val) & 0xfffff);
30872
30873   *pri = tmp;
30874   return;
30875 }
30876
30877
30878 /* Construct and return a PARALLEL RTX vector with elements numbering the
30879    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30880    the vector - from the perspective of the architecture.  This does not
30881    line up with GCC's perspective on lane numbers, so we end up with
30882    different masks depending on our target endian-ness.  The diagram
30883    below may help.  We must draw the distinction when building masks
30884    which select one half of the vector.  An instruction selecting
30885    architectural low-lanes for a big-endian target, must be described using
30886    a mask selecting GCC high-lanes.
30887
30888                  Big-Endian             Little-Endian
30889
30890 GCC             0   1   2   3           3   2   1   0
30891               | x | x | x | x |       | x | x | x | x |
30892 Architecture    3   2   1   0           3   2   1   0
30893
30894 Low Mask:         { 2, 3 }                { 0, 1 }
30895 High Mask:        { 0, 1 }                { 2, 3 }
30896 */
30897
30898 rtx
30899 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30900 {
30901   int nunits = GET_MODE_NUNITS (mode);
30902   rtvec v = rtvec_alloc (nunits / 2);
30903   int high_base = nunits / 2;
30904   int low_base = 0;
30905   int base;
30906   rtx t1;
30907   int i;
30908
30909   if (BYTES_BIG_ENDIAN)
30910     base = high ? low_base : high_base;
30911   else
30912     base = high ? high_base : low_base;
30913
30914   for (i = 0; i < nunits / 2; i++)
30915     RTVEC_ELT (v, i) = GEN_INT (base + i);
30916
30917   t1 = gen_rtx_PARALLEL (mode, v);
30918   return t1;
30919 }
30920
30921 /* Check OP for validity as a PARALLEL RTX vector with elements
30922    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30923    from the perspective of the architecture.  See the diagram above
30924    arm_simd_vect_par_cnst_half_p for more details.  */
30925
30926 bool
30927 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30928                                        bool high)
30929 {
30930   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30931   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30932   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30933   int i = 0;
30934
30935   if (!VECTOR_MODE_P (mode))
30936     return false;
30937
30938   if (count_op != count_ideal)
30939     return false;
30940
30941   for (i = 0; i < count_ideal; i++)
30942     {
30943       rtx elt_op = XVECEXP (op, 0, i);
30944       rtx elt_ideal = XVECEXP (ideal, 0, i);
30945
30946       if (!CONST_INT_P (elt_op)
30947           || INTVAL (elt_ideal) != INTVAL (elt_op))
30948         return false;
30949     }
30950   return true;
30951 }
30952
30953 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30954    in Thumb1.  */
30955 static bool
30956 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30957                          const_tree)
30958 {
30959   /* For now, we punt and not handle this for TARGET_THUMB1.  */
30960   if (vcall_offset && TARGET_THUMB1)
30961     return false;
30962
30963   /* Otherwise ok.  */
30964   return true;
30965 }
30966
30967 /* Generate RTL for a conditional branch with rtx comparison CODE in
30968    mode CC_MODE. The destination of the unlikely conditional branch
30969    is LABEL_REF.  */
30970
30971 void
30972 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30973                           rtx label_ref)
30974 {
30975   rtx x;
30976   x = gen_rtx_fmt_ee (code, VOIDmode,
30977                       gen_rtx_REG (cc_mode, CC_REGNUM),
30978                       const0_rtx);
30979
30980   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30981                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
30982                             pc_rtx);
30983   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30984 }
30985
30986 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30987
30988    For pure-code sections there is no letter code for this attribute, so
30989    output all the section flags numerically when this is needed.  */
30990
30991 static bool
30992 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30993 {
30994
30995   if (flags & SECTION_ARM_PURECODE)
30996     {
30997       *num = 0x20000000;
30998
30999       if (!(flags & SECTION_DEBUG))
31000         *num |= 0x2;
31001       if (flags & SECTION_EXCLUDE)
31002         *num |= 0x80000000;
31003       if (flags & SECTION_WRITE)
31004         *num |= 0x1;
31005       if (flags & SECTION_CODE)
31006         *num |= 0x4;
31007       if (flags & SECTION_MERGE)
31008         *num |= 0x10;
31009       if (flags & SECTION_STRINGS)
31010         *num |= 0x20;
31011       if (flags & SECTION_TLS)
31012         *num |= 0x400;
31013       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31014         *num |= 0x200;
31015
31016         return true;
31017     }
31018
31019   return false;
31020 }
31021
31022 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31023
31024    If pure-code is passed as an option, make sure all functions are in
31025    sections that have the SHF_ARM_PURECODE attribute.  */
31026
31027 static section *
31028 arm_function_section (tree decl, enum node_frequency freq,
31029                       bool startup, bool exit)
31030 {
31031   const char * section_name;
31032   section * sec;
31033
31034   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31035     return default_function_section (decl, freq, startup, exit);
31036
31037   if (!target_pure_code)
31038     return default_function_section (decl, freq, startup, exit);
31039
31040
31041   section_name = DECL_SECTION_NAME (decl);
31042
31043   /* If a function is not in a named section then it falls under the 'default'
31044      text section, also known as '.text'.  We can preserve previous behavior as
31045      the default text section already has the SHF_ARM_PURECODE section
31046      attribute.  */
31047   if (!section_name)
31048     {
31049       section *default_sec = default_function_section (decl, freq, startup,
31050                                                        exit);
31051
31052       /* If default_sec is not null, then it must be a special section like for
31053          example .text.startup.  We set the pure-code attribute and return the
31054          same section to preserve existing behavior.  */
31055       if (default_sec)
31056           default_sec->common.flags |= SECTION_ARM_PURECODE;
31057       return default_sec;
31058     }
31059
31060   /* Otherwise look whether a section has already been created with
31061      'section_name'.  */
31062   sec = get_named_section (decl, section_name, 0);
31063   if (!sec)
31064     /* If that is not the case passing NULL as the section's name to
31065        'get_named_section' will create a section with the declaration's
31066        section name.  */
31067     sec = get_named_section (decl, NULL, 0);
31068
31069   /* Set the SHF_ARM_PURECODE attribute.  */
31070   sec->common.flags |= SECTION_ARM_PURECODE;
31071
31072   return sec;
31073 }
31074
31075 /* Implements the TARGET_SECTION_FLAGS hook.
31076
31077    If DECL is a function declaration and pure-code is passed as an option
31078    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31079    section's name and RELOC indicates whether the declarations initializer may
31080    contain runtime relocations.  */
31081
31082 static unsigned int
31083 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31084 {
31085   unsigned int flags = default_section_type_flags (decl, name, reloc);
31086
31087   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31088     flags |= SECTION_ARM_PURECODE;
31089
31090   return flags;
31091 }
31092
31093 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31094
31095 static void
31096 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31097                            rtx op0, rtx op1,
31098                            rtx *quot_p, rtx *rem_p)
31099 {
31100   if (mode == SImode)
31101     gcc_assert (!TARGET_IDIV);
31102
31103   scalar_int_mode libval_mode
31104     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31105
31106   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31107                                         libval_mode,
31108                                         op0, GET_MODE (op0),
31109                                         op1, GET_MODE (op1));
31110
31111   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31112   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31113                                        GET_MODE_SIZE (mode));
31114
31115   gcc_assert (quotient);
31116   gcc_assert (remainder);
31117
31118   *quot_p = quotient;
31119   *rem_p = remainder;
31120 }
31121
31122 /*  This function checks for the availability of the coprocessor builtin passed
31123     in BUILTIN for the current target.  Returns true if it is available and
31124     false otherwise.  If a BUILTIN is passed for which this function has not
31125     been implemented it will cause an exception.  */
31126
31127 bool
31128 arm_coproc_builtin_available (enum unspecv builtin)
31129 {
31130   /* None of these builtins are available in Thumb mode if the target only
31131      supports Thumb-1.  */
31132   if (TARGET_THUMB1)
31133     return false;
31134
31135   switch (builtin)
31136     {
31137       case VUNSPEC_CDP:
31138       case VUNSPEC_LDC:
31139       case VUNSPEC_LDCL:
31140       case VUNSPEC_STC:
31141       case VUNSPEC_STCL:
31142       case VUNSPEC_MCR:
31143       case VUNSPEC_MRC:
31144         if (arm_arch4)
31145           return true;
31146         break;
31147       case VUNSPEC_CDP2:
31148       case VUNSPEC_LDC2:
31149       case VUNSPEC_LDC2L:
31150       case VUNSPEC_STC2:
31151       case VUNSPEC_STC2L:
31152       case VUNSPEC_MCR2:
31153       case VUNSPEC_MRC2:
31154         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31155            ARMv8-{A,M}.  */
31156         if (arm_arch5)
31157           return true;
31158         break;
31159       case VUNSPEC_MCRR:
31160       case VUNSPEC_MRRC:
31161         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31162            ARMv8-{A,M}.  */
31163         if (arm_arch6 || arm_arch5te)
31164           return true;
31165         break;
31166       case VUNSPEC_MCRR2:
31167       case VUNSPEC_MRRC2:
31168         if (arm_arch6)
31169           return true;
31170         break;
31171       default:
31172         gcc_unreachable ();
31173     }
31174   return false;
31175 }
31176
31177 /* This function returns true if OP is a valid memory operand for the ldc and
31178    stc coprocessor instructions and false otherwise.  */
31179
31180 bool
31181 arm_coproc_ldc_stc_legitimate_address (rtx op)
31182 {
31183   HOST_WIDE_INT range;
31184   /* Has to be a memory operand.  */
31185   if (!MEM_P (op))
31186     return false;
31187
31188   op = XEXP (op, 0);
31189
31190   /* We accept registers.  */
31191   if (REG_P (op))
31192     return true;
31193
31194   switch GET_CODE (op)
31195     {
31196       case PLUS:
31197         {
31198           /* Or registers with an offset.  */
31199           if (!REG_P (XEXP (op, 0)))
31200             return false;
31201
31202           op = XEXP (op, 1);
31203
31204           /* The offset must be an immediate though.  */
31205           if (!CONST_INT_P (op))
31206             return false;
31207
31208           range = INTVAL (op);
31209
31210           /* Within the range of [-1020,1020].  */
31211           if (!IN_RANGE (range, -1020, 1020))
31212             return false;
31213
31214           /* And a multiple of 4.  */
31215           return (range % 4) == 0;
31216         }
31217       case PRE_INC:
31218       case POST_INC:
31219       case PRE_DEC:
31220       case POST_DEC:
31221         return REG_P (XEXP (op, 0));
31222       default:
31223         gcc_unreachable ();
31224     }
31225   return false;
31226 }
31227
31228 #if CHECKING_P
31229 namespace selftest {
31230
31231 /* Scan the static data tables generated by parsecpu.awk looking for
31232    potential issues with the data.  We primarily check for
31233    inconsistencies in the option extensions at present (extensions
31234    that duplicate others but aren't marked as aliases).  Furthermore,
31235    for correct canonicalization later options must never be a subset
31236    of an earlier option.  Any extension should also only specify other
31237    feature bits and never an architecture bit.  The architecture is inferred
31238    from the declaration of the extension.  */
31239 static void
31240 arm_test_cpu_arch_data (void)
31241 {
31242   const arch_option *arch;
31243   const cpu_option *cpu;
31244   auto_sbitmap target_isa (isa_num_bits);
31245   auto_sbitmap isa1 (isa_num_bits);
31246   auto_sbitmap isa2 (isa_num_bits);
31247
31248   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31249     {
31250       const cpu_arch_extension *ext1, *ext2;
31251
31252       if (arch->common.extensions == NULL)
31253         continue;
31254
31255       arm_initialize_isa (target_isa, arch->common.isa_bits);
31256
31257       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31258         {
31259           if (ext1->alias)
31260             continue;
31261
31262           arm_initialize_isa (isa1, ext1->isa_bits);
31263           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31264             {
31265               if (ext2->alias || ext1->remove != ext2->remove)
31266                 continue;
31267
31268               arm_initialize_isa (isa2, ext2->isa_bits);
31269               /* If the option is a subset of the parent option, it doesn't
31270                  add anything and so isn't useful.  */
31271               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31272
31273               /* If the extension specifies any architectural bits then
31274                  disallow it.  Extensions should only specify feature bits.  */
31275               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31276             }
31277         }
31278     }
31279
31280   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31281     {
31282       const cpu_arch_extension *ext1, *ext2;
31283
31284       if (cpu->common.extensions == NULL)
31285         continue;
31286
31287       arm_initialize_isa (target_isa, arch->common.isa_bits);
31288
31289       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31290         {
31291           if (ext1->alias)
31292             continue;
31293
31294           arm_initialize_isa (isa1, ext1->isa_bits);
31295           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31296             {
31297               if (ext2->alias || ext1->remove != ext2->remove)
31298                 continue;
31299
31300               arm_initialize_isa (isa2, ext2->isa_bits);
31301               /* If the option is a subset of the parent option, it doesn't
31302                  add anything and so isn't useful.  */
31303               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31304
31305               /* If the extension specifies any architectural bits then
31306                  disallow it.  Extensions should only specify feature bits.  */
31307               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31308             }
31309         }
31310     }
31311 }
31312
31313 static void
31314 arm_run_selftests (void)
31315 {
31316   arm_test_cpu_arch_data ();
31317 }
31318 } /* Namespace selftest.  */
31319
31320 #undef TARGET_RUN_TARGET_SELFTESTS
31321 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31322 #endif /* CHECKING_P */
31323
31324 struct gcc_target targetm = TARGET_INITIALIZER;
31325
31326 #include "gt-arm.h"