gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 320 \f
 321 /* Table of machine attributes.  */
 322 static const struct attribute_spec arm_attribute_table[] =
 323 {
 324   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 325        affects_type_identity } */
 326   /* Function calls made to this symbol must be done indirectly, because
 327      it may lie outside of the 26 bit addressing range of a normal function
 328      call.  */
 329   { "long_call",    0, 0, false, true,  true,  NULL, false },
 330   /* Whereas these functions are always known to reside within the 26 bit
 331      addressing range.  */
 332   { "short_call",   0, 0, false, true,  true,  NULL, false },
 333   /* Specify the procedure call conventions for a function.  */
 334   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 335     false },
 336   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 337   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 338     false },
 339   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 340     false },
 341   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 342     false },
 343 #ifdef ARM_PE
 344   /* ARM/PE has three new attributes:
 345      interfacearm - ?
 346      dllexport - for exporting a function/variable that will live in a dll
 347      dllimport - for importing a function/variable from a dll
 348
 349      Microsoft allows multiple declspecs in one __declspec, separating
 350      them with spaces.  We do NOT support this.  Instead, use __declspec
 351      multiple times.
 352   */
 353   { "dllimport",    0, 0, true,  false, false, NULL, false },
 354   { "dllexport",    0, 0, true,  false, false, NULL, false },
 355   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 356     false },
 357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 358   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 360   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 361     false },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 365     arm_handle_cmse_nonsecure_entry, false },
 366   { "cmse_nonsecure_call", 0, 0, true, false, false,
 367     arm_handle_cmse_nonsecure_call, true },
 368   { NULL,           0, 0, false, false, false, NULL, false }
 369 };
 370 \f
 371 /* Initialize the GCC target structure.  */
 372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 373 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 375 #endif
 376
 377 #undef TARGET_LEGITIMIZE_ADDRESS
 378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 379
 380 #undef  TARGET_ATTRIBUTE_TABLE
 381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 382
 383 #undef  TARGET_INSERT_ATTRIBUTES
 384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 385
 386 #undef TARGET_ASM_FILE_START
 387 #define TARGET_ASM_FILE_START arm_file_start
 388 #undef TARGET_ASM_FILE_END
 389 #define TARGET_ASM_FILE_END arm_file_end
 390
 391 #undef  TARGET_ASM_ALIGNED_SI_OP
 392 #define TARGET_ASM_ALIGNED_SI_OP NULL
 393 #undef  TARGET_ASM_INTEGER
 394 #define TARGET_ASM_INTEGER arm_assemble_integer
 395
 396 #undef TARGET_PRINT_OPERAND
 397 #define TARGET_PRINT_OPERAND arm_print_operand
 398 #undef TARGET_PRINT_OPERAND_ADDRESS
 399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 402
 403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 405
 406 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 408
 409 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 411
 412 #undef TARGET_CAN_INLINE_P
 413 #define TARGET_CAN_INLINE_P arm_can_inline_p
 414
 415 #undef TARGET_RELAYOUT_FUNCTION
 416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 417
 418 #undef  TARGET_OPTION_OVERRIDE
 419 #define TARGET_OPTION_OVERRIDE arm_option_override
 420
 421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 423
 424 #undef TARGET_OPTION_SAVE
 425 #define TARGET_OPTION_SAVE arm_option_save
 426
 427 #undef TARGET_OPTION_RESTORE
 428 #define TARGET_OPTION_RESTORE arm_option_restore
 429
 430 #undef TARGET_OPTION_PRINT
 431 #define TARGET_OPTION_PRINT arm_option_print
 432
 433 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 435
 436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_P
 440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 444
 445 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 447
 448 #undef  TARGET_SCHED_ADJUST_COST
 449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 450
 451 #undef TARGET_SET_CURRENT_FUNCTION
 452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 453
 454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 456
 457 #undef TARGET_SCHED_REORDER
 458 #define TARGET_SCHED_REORDER arm_sched_reorder
 459
 460 #undef TARGET_REGISTER_MOVE_COST
 461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 462
 463 #undef TARGET_MEMORY_MOVE_COST
 464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 465
 466 #undef TARGET_ENCODE_SECTION_INFO
 467 #ifdef ARM_PE
 468 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 469 #else
 470 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 471 #endif
 472
 473 #undef  TARGET_STRIP_NAME_ENCODING
 474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 475
 476 #undef  TARGET_ASM_INTERNAL_LABEL
 477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 478
 479 #undef TARGET_FLOATN_MODE
 480 #define TARGET_FLOATN_MODE arm_floatn_mode
 481
 482 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 484
 485 #undef  TARGET_FUNCTION_VALUE
 486 #define TARGET_FUNCTION_VALUE arm_function_value
 487
 488 #undef  TARGET_LIBCALL_VALUE
 489 #define TARGET_LIBCALL_VALUE arm_libcall_value
 490
 491 #undef TARGET_FUNCTION_VALUE_REGNO_P
 492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 493
 494 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 496 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 498
 499 #undef  TARGET_RTX_COSTS
 500 #define TARGET_RTX_COSTS arm_rtx_costs
 501 #undef  TARGET_ADDRESS_COST
 502 #define TARGET_ADDRESS_COST arm_address_cost
 503
 504 #undef TARGET_SHIFT_TRUNCATION_MASK
 505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 514   arm_autovectorize_vector_sizes
 515
 516 #undef  TARGET_MACHINE_DEPENDENT_REORG
 517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 518
 519 #undef  TARGET_INIT_BUILTINS
 520 #define TARGET_INIT_BUILTINS  arm_init_builtins
 521 #undef  TARGET_EXPAND_BUILTIN
 522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 523 #undef  TARGET_BUILTIN_DECL
 524 #define TARGET_BUILTIN_DECL arm_builtin_decl
 525
 526 #undef TARGET_INIT_LIBFUNCS
 527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 528
 529 #undef TARGET_PROMOTE_FUNCTION_MODE
 530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 531 #undef TARGET_PROMOTE_PROTOTYPES
 532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 533 #undef TARGET_PASS_BY_REFERENCE
 534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 535 #undef TARGET_ARG_PARTIAL_BYTES
 536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 537 #undef TARGET_FUNCTION_ARG
 538 #define TARGET_FUNCTION_ARG arm_function_arg
 539 #undef TARGET_FUNCTION_ARG_ADVANCE
 540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 541 #undef TARGET_FUNCTION_ARG_PADDING
 542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 543 #undef TARGET_FUNCTION_ARG_BOUNDARY
 544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 545
 546 #undef  TARGET_SETUP_INCOMING_VARARGS
 547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 548
 549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 551
 552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 554 #undef TARGET_TRAMPOLINE_INIT
 555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 558
 559 #undef TARGET_WARN_FUNC_RETURN
 560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 561
 562 #undef TARGET_DEFAULT_SHORT_ENUMS
 563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 564
 565 #undef TARGET_ALIGN_ANON_BITFIELD
 566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 567
 568 #undef TARGET_NARROW_VOLATILE_BITFIELD
 569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 570
 571 #undef TARGET_CXX_GUARD_TYPE
 572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 573
 574 #undef TARGET_CXX_GUARD_MASK_BIT
 575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 576
 577 #undef TARGET_CXX_GET_COOKIE_SIZE
 578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 579
 580 #undef TARGET_CXX_COOKIE_HAS_SIZE
 581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 582
 583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 585
 586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 588
 589 #undef TARGET_CXX_USE_AEABI_ATEXIT
 590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 591
 592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 594   arm_cxx_determine_class_data_visibility
 595
 596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 598
 599 #undef TARGET_RETURN_IN_MSB
 600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 601
 602 #undef TARGET_RETURN_IN_MEMORY
 603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 604
 605 #undef TARGET_MUST_PASS_IN_STACK
 606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 607
 608 #if ARM_UNWIND_INFO
 609 #undef TARGET_ASM_UNWIND_EMIT
 610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 611
 612 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 613 #undef TARGET_ASM_TTYPE
 614 #define TARGET_ASM_TTYPE arm_output_ttype
 615
 616 #undef TARGET_ARM_EABI_UNWINDER
 617 #define TARGET_ARM_EABI_UNWINDER true
 618
 619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 621
 622 #endif /* ARM_UNWIND_INFO */
 623
 624 #undef TARGET_ASM_INIT_SECTIONS
 625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 626
 627 #undef TARGET_DWARF_REGISTER_SPAN
 628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 629
 630 #undef  TARGET_CANNOT_COPY_INSN_P
 631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 632
 633 #ifdef HAVE_AS_TLS
 634 #undef TARGET_HAVE_TLS
 635 #define TARGET_HAVE_TLS true
 636 #endif
 637
 638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 640
 641 #undef TARGET_LEGITIMATE_CONSTANT_P
 642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 643
 644 #undef TARGET_CANNOT_FORCE_CONST_MEM
 645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 646
 647 #undef TARGET_MAX_ANCHOR_OFFSET
 648 #define TARGET_MAX_ANCHOR_OFFSET 4095
 649
 650 /* The minimum is set such that the total size of the block
 651    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 652    divisible by eight, ensuring natural spacing of anchors.  */
 653 #undef TARGET_MIN_ANCHOR_OFFSET
 654 #define TARGET_MIN_ANCHOR_OFFSET -4088
 655
 656 #undef TARGET_SCHED_ISSUE_RATE
 657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 658
 659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 661   arm_first_cycle_multipass_dfa_lookahead
 662
 663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 665   arm_first_cycle_multipass_dfa_lookahead_guard
 666
 667 #undef TARGET_MANGLE_TYPE
 668 #define TARGET_MANGLE_TYPE arm_mangle_type
 669
 670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 672
 673 #undef TARGET_BUILD_BUILTIN_VA_LIST
 674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 675 #undef TARGET_EXPAND_BUILTIN_VA_START
 676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 679
 680 #ifdef HAVE_AS_TLS
 681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 683 #endif
 684
 685 #undef TARGET_LEGITIMATE_ADDRESS_P
 686 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 687
 688 #undef TARGET_PREFERRED_RELOAD_CLASS
 689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 690
 691 #undef TARGET_PROMOTED_TYPE
 692 #define TARGET_PROMOTED_TYPE arm_promoted_type
 693
 694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 696
 697 #undef TARGET_COMPUTE_FRAME_LAYOUT
 698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 699
 700 #undef TARGET_FRAME_POINTER_REQUIRED
 701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 702
 703 #undef TARGET_CAN_ELIMINATE
 704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 705
 706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 708
 709 #undef TARGET_CLASS_LIKELY_SPILLED_P
 710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 711
 712 #undef TARGET_VECTORIZE_BUILTINS
 713 #define TARGET_VECTORIZE_BUILTINS
 714
 715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 717   arm_builtin_vectorized_function
 718
 719 #undef TARGET_VECTOR_ALIGNMENT
 720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 721
 722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 724   arm_vector_alignment_reachable
 725
 726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 728   arm_builtin_support_vector_misalignment
 729
 730 #undef TARGET_PREFERRED_RENAME_CLASS
 731 #define TARGET_PREFERRED_RENAME_CLASS \
 732   arm_preferred_rename_class
 733
 734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 736   arm_vectorize_vec_perm_const_ok
 737
 738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 740   arm_builtin_vectorization_cost
 741 #undef TARGET_VECTORIZE_ADD_STMT_COST
 742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 743
 744 #undef TARGET_CANONICALIZE_COMPARISON
 745 #define TARGET_CANONICALIZE_COMPARISON \
 746   arm_canonicalize_comparison
 747
 748 #undef TARGET_ASAN_SHADOW_OFFSET
 749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 750
 751 #undef MAX_INSN_PER_IT_BLOCK
 752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 753
 754 #undef TARGET_CAN_USE_DOLOOP_P
 755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 756
 757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 759
 760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 762
 763 #undef TARGET_SCHED_FUSION_PRIORITY
 764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 765
 766 #undef  TARGET_ASM_FUNCTION_SECTION
 767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 768
 769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 771
 772 #undef TARGET_SECTION_TYPE_FLAGS
 773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 774
 775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 777
 778 #undef TARGET_C_EXCESS_PRECISION
 779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 780
 781 /* Although the architecture reserves bits 0 and 1, only the former is
 782    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 785
 786 #undef TARGET_FIXED_CONDITION_CODE_REGS
 787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 788
 789 #undef TARGET_HARD_REGNO_NREGS
 790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 791 #undef TARGET_HARD_REGNO_MODE_OK
 792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 793
 794 #undef TARGET_MODES_TIEABLE_P
 795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 796
 797 #undef TARGET_CAN_CHANGE_MODE_CLASS
 798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 799
 800 #undef TARGET_CONSTANT_ALIGNMENT
 801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 802 \f
 803 /* Obstack for minipool constant handling.  */
 804 static struct obstack minipool_obstack;
 805 static char *         minipool_startobj;
 806
 807 /* The maximum number of insns skipped which
 808    will be conditionalised if possible.  */
 809 static int max_insns_skipped = 5;
 810
 811 extern FILE * asm_out_file;
 812
 813 /* True if we are currently building a constant table.  */
 814 int making_const_table;
 815
 816 /* The processor for which instructions should be scheduled.  */
 817 enum processor_type arm_tune = TARGET_CPU_arm_none;
 818
 819 /* The current tuning set.  */
 820 const struct tune_params *current_tune;
 821
 822 /* Which floating point hardware to schedule for.  */
 823 int arm_fpu_attr;
 824
 825 /* Used for Thumb call_via trampolines.  */
 826 rtx thumb_call_via_label[14];
 827 static int thumb_call_reg_needed;
 828
 829 /* The bits in this mask specify which instruction scheduling options should
 830    be used.  */
 831 unsigned int tune_flags = 0;
 832
 833 /* The highest ARM architecture version supported by the
 834    target.  */
 835 enum base_architecture arm_base_arch = BASE_ARCH_0;
 836
 837 /* Active target architecture and tuning.  */
 838
 839 struct arm_build_target arm_active_target;
 840
 841 /* The following are used in the arm.md file as equivalents to bits
 842    in the above two flag variables.  */
 843
 844 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 845 int arm_arch3m = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 848 int arm_arch4 = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 851 int arm_arch4t = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 854 int arm_arch5 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 857 int arm_arch5e = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 860 int arm_arch5te = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 863 int arm_arch6 = 0;
 864
 865 /* Nonzero if this chip supports the ARM 6K extensions.  */
 866 int arm_arch6k = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 869 int arm_arch6kz = 0;
 870
 871 /* Nonzero if instructions present in ARMv6-M can be used.  */
 872 int arm_arch6m = 0;
 873
 874 /* Nonzero if this chip supports the ARM 7 extensions.  */
 875 int arm_arch7 = 0;
 876
 877 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 878 int arm_arch_lpae = 0;
 879
 880 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 881 int arm_arch_notm = 0;
 882
 883 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 884 int arm_arch7em = 0;
 885
 886 /* Nonzero if instructions present in ARMv8 can be used.  */
 887 int arm_arch8 = 0;
 888
 889 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 890 int arm_arch8_1 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 893 int arm_arch8_2 = 0;
 894
 895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 896    Architecture 8.2.  */
 897 int arm_fp16_inst = 0;
 898
 899 /* Nonzero if this chip can benefit from load scheduling.  */
 900 int arm_ld_sched = 0;
 901
 902 /* Nonzero if this chip is a StrongARM.  */
 903 int arm_tune_strongarm = 0;
 904
 905 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 906 int arm_arch_iwmmxt = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 909 int arm_arch_iwmmxt2 = 0;
 910
 911 /* Nonzero if this chip is an XScale.  */
 912 int arm_arch_xscale = 0;
 913
 914 /* Nonzero if tuning for XScale  */
 915 int arm_tune_xscale = 0;
 916
 917 /* Nonzero if we want to tune for stores that access the write-buffer.
 918    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 919 int arm_tune_wbuf = 0;
 920
 921 /* Nonzero if tuning for Cortex-A9.  */
 922 int arm_tune_cortex_a9 = 0;
 923
 924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 925    preprocessor.
 926    XXX This is a bit of a hack, it's intended to help work around
 927    problems in GLD which doesn't understand that armv5t code is
 928    interworking clean.  */
 929 int arm_cpp_interwork = 0;
 930
 931 /* Nonzero if chip supports Thumb 1.  */
 932 int arm_arch_thumb1;
 933
 934 /* Nonzero if chip supports Thumb 2.  */
 935 int arm_arch_thumb2;
 936
 937 /* Nonzero if chip supports integer division instruction.  */
 938 int arm_arch_arm_hwdiv;
 939 int arm_arch_thumb_hwdiv;
 940
 941 /* Nonzero if chip disallows volatile memory access in IT block.  */
 942 int arm_arch_no_volatile_ce;
 943
 944 /* Nonzero if we should use Neon to handle 64-bits operations rather
 945    than core registers.  */
 946 int prefer_neon_for_64bits = 0;
 947
 948 /* Nonzero if we shouldn't use literal pools.  */
 949 bool arm_disable_literal_pool = false;
 950
 951 /* The register number to be used for the PIC offset register.  */
 952 unsigned arm_pic_register = INVALID_REGNUM;
 953
 954 enum arm_pcs arm_pcs_default;
 955
 956 /* For an explanation of these variables, see final_prescan_insn below.  */
 957 int arm_ccfsm_state;
 958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 959 enum arm_cond_code arm_current_cc;
 960
 961 rtx arm_target_insn;
 962 int arm_target_label;
 963 /* The number of conditionally executed insns, including the current insn.  */
 964 int arm_condexec_count = 0;
 965 /* A bitmask specifying the patterns for the IT block.
 966    Zero means do not output an IT block before this insn. */
 967 int arm_condexec_mask = 0;
 968 /* The number of bits used in arm_condexec_mask.  */
 969 int arm_condexec_masklen = 0;
 970
 971 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 972 int arm_arch_crc = 0;
 973
 974 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 975 int arm_arch_cmse = 0;
 976
 977 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 978 int arm_m_profile_small_mul = 0;
 979
 980 /* The condition codes of the ARM, and the inverse function.  */
 981 static const char * const arm_condition_codes[] =
 982 {
 983   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 984   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 985 };
 986
 987 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 988 int arm_regs_in_sequence[] =
 989 {
 990   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 991 };
 992
 993 #define ARM_LSL_NAME "lsl"
 994 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 995
 996 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 997                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 998                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 999 \f
1000 /* Initialization code.  */
1001
1002 struct cpu_tune
1003 {
1004   enum processor_type scheduler;
1005   unsigned int tune_flags;
1006   const struct tune_params *tune;
1007 };
1008
1009 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1010 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1011   {                                                             \
1012     num_slots,                                                  \
1013     l1_size,                                                    \
1014     l1_line_size                                                \
1015   }
1016
1017 /* arm generic vectorizer costs.  */
1018 static const
1019 struct cpu_vec_costs arm_default_vec_cost = {
1020   1,                                    /* scalar_stmt_cost.  */
1021   1,                                    /* scalar load_cost.  */
1022   1,                                    /* scalar_store_cost.  */
1023   1,                                    /* vec_stmt_cost.  */
1024   1,                                    /* vec_to_scalar_cost.  */
1025   1,                                    /* scalar_to_vec_cost.  */
1026   1,                                    /* vec_align_load_cost.  */
1027   1,                                    /* vec_unalign_load_cost.  */
1028   1,                                    /* vec_unalign_store_cost.  */
1029   1,                                    /* vec_store_cost.  */
1030   3,                                    /* cond_taken_branch_cost.  */
1031   1,                                    /* cond_not_taken_branch_cost.  */
1032 };
1033
1034 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1035 #include "aarch-cost-tables.h"
1036
1037
1038
1039 const struct cpu_cost_table cortexa9_extra_costs =
1040 {
1041   /* ALU */
1042   {
1043     0,                  /* arith.  */
1044     0,                  /* logical.  */
1045     0,                  /* shift.  */
1046     COSTS_N_INSNS (1),  /* shift_reg.  */
1047     COSTS_N_INSNS (1),  /* arith_shift.  */
1048     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1049     0,                  /* log_shift.  */
1050     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1051     COSTS_N_INSNS (1),  /* extend.  */
1052     COSTS_N_INSNS (2),  /* extend_arith.  */
1053     COSTS_N_INSNS (1),  /* bfi.  */
1054     COSTS_N_INSNS (1),  /* bfx.  */
1055     0,                  /* clz.  */
1056     0,                  /* rev.  */
1057     0,                  /* non_exec.  */
1058     true                /* non_exec_costs_exec.  */
1059   },
1060   {
1061     /* MULT SImode */
1062     {
1063       COSTS_N_INSNS (3),        /* simple.  */
1064       COSTS_N_INSNS (3),        /* flag_setting.  */
1065       COSTS_N_INSNS (2),        /* extend.  */
1066       COSTS_N_INSNS (3),        /* add.  */
1067       COSTS_N_INSNS (2),        /* extend_add.  */
1068       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1069     },
1070     /* MULT DImode */
1071     {
1072       0,                        /* simple (N/A).  */
1073       0,                        /* flag_setting (N/A).  */
1074       COSTS_N_INSNS (4),        /* extend.  */
1075       0,                        /* add (N/A).  */
1076       COSTS_N_INSNS (4),        /* extend_add.  */
1077       0                         /* idiv (N/A).  */
1078     }
1079   },
1080   /* LD/ST */
1081   {
1082     COSTS_N_INSNS (2),  /* load.  */
1083     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1084     COSTS_N_INSNS (2),  /* ldrd.  */
1085     COSTS_N_INSNS (2),  /* ldm_1st.  */
1086     1,                  /* ldm_regs_per_insn_1st.  */
1087     2,                  /* ldm_regs_per_insn_subsequent.  */
1088     COSTS_N_INSNS (5),  /* loadf.  */
1089     COSTS_N_INSNS (5),  /* loadd.  */
1090     COSTS_N_INSNS (1),  /* load_unaligned.  */
1091     COSTS_N_INSNS (2),  /* store.  */
1092     COSTS_N_INSNS (2),  /* strd.  */
1093     COSTS_N_INSNS (2),  /* stm_1st.  */
1094     1,                  /* stm_regs_per_insn_1st.  */
1095     2,                  /* stm_regs_per_insn_subsequent.  */
1096     COSTS_N_INSNS (1),  /* storef.  */
1097     COSTS_N_INSNS (1),  /* stored.  */
1098     COSTS_N_INSNS (1),  /* store_unaligned.  */
1099     COSTS_N_INSNS (1),  /* loadv.  */
1100     COSTS_N_INSNS (1)   /* storev.  */
1101   },
1102   {
1103     /* FP SFmode */
1104     {
1105       COSTS_N_INSNS (14),       /* div.  */
1106       COSTS_N_INSNS (4),        /* mult.  */
1107       COSTS_N_INSNS (7),        /* mult_addsub. */
1108       COSTS_N_INSNS (30),       /* fma.  */
1109       COSTS_N_INSNS (3),        /* addsub.  */
1110       COSTS_N_INSNS (1),        /* fpconst.  */
1111       COSTS_N_INSNS (1),        /* neg.  */
1112       COSTS_N_INSNS (3),        /* compare.  */
1113       COSTS_N_INSNS (3),        /* widen.  */
1114       COSTS_N_INSNS (3),        /* narrow.  */
1115       COSTS_N_INSNS (3),        /* toint.  */
1116       COSTS_N_INSNS (3),        /* fromint.  */
1117       COSTS_N_INSNS (3)         /* roundint.  */
1118     },
1119     /* FP DFmode */
1120     {
1121       COSTS_N_INSNS (24),       /* div.  */
1122       COSTS_N_INSNS (5),        /* mult.  */
1123       COSTS_N_INSNS (8),        /* mult_addsub.  */
1124       COSTS_N_INSNS (30),       /* fma.  */
1125       COSTS_N_INSNS (3),        /* addsub.  */
1126       COSTS_N_INSNS (1),        /* fpconst.  */
1127       COSTS_N_INSNS (1),        /* neg.  */
1128       COSTS_N_INSNS (3),        /* compare.  */
1129       COSTS_N_INSNS (3),        /* widen.  */
1130       COSTS_N_INSNS (3),        /* narrow.  */
1131       COSTS_N_INSNS (3),        /* toint.  */
1132       COSTS_N_INSNS (3),        /* fromint.  */
1133       COSTS_N_INSNS (3)         /* roundint.  */
1134     }
1135   },
1136   /* Vector */
1137   {
1138     COSTS_N_INSNS (1)   /* alu.  */
1139   }
1140 };
1141
1142 const struct cpu_cost_table cortexa8_extra_costs =
1143 {
1144   /* ALU */
1145   {
1146     0,                  /* arith.  */
1147     0,                  /* logical.  */
1148     COSTS_N_INSNS (1),  /* shift.  */
1149     0,                  /* shift_reg.  */
1150     COSTS_N_INSNS (1),  /* arith_shift.  */
1151     0,                  /* arith_shift_reg.  */
1152     COSTS_N_INSNS (1),  /* log_shift.  */
1153     0,                  /* log_shift_reg.  */
1154     0,                  /* extend.  */
1155     0,                  /* extend_arith.  */
1156     0,                  /* bfi.  */
1157     0,                  /* bfx.  */
1158     0,                  /* clz.  */
1159     0,                  /* rev.  */
1160     0,                  /* non_exec.  */
1161     true                /* non_exec_costs_exec.  */
1162   },
1163   {
1164     /* MULT SImode */
1165     {
1166       COSTS_N_INSNS (1),        /* simple.  */
1167       COSTS_N_INSNS (1),        /* flag_setting.  */
1168       COSTS_N_INSNS (1),        /* extend.  */
1169       COSTS_N_INSNS (1),        /* add.  */
1170       COSTS_N_INSNS (1),        /* extend_add.  */
1171       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1172     },
1173     /* MULT DImode */
1174     {
1175       0,                        /* simple (N/A).  */
1176       0,                        /* flag_setting (N/A).  */
1177       COSTS_N_INSNS (2),        /* extend.  */
1178       0,                        /* add (N/A).  */
1179       COSTS_N_INSNS (2),        /* extend_add.  */
1180       0                         /* idiv (N/A).  */
1181     }
1182   },
1183   /* LD/ST */
1184   {
1185     COSTS_N_INSNS (1),  /* load.  */
1186     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1187     COSTS_N_INSNS (1),  /* ldrd.  */
1188     COSTS_N_INSNS (1),  /* ldm_1st.  */
1189     1,                  /* ldm_regs_per_insn_1st.  */
1190     2,                  /* ldm_regs_per_insn_subsequent.  */
1191     COSTS_N_INSNS (1),  /* loadf.  */
1192     COSTS_N_INSNS (1),  /* loadd.  */
1193     COSTS_N_INSNS (1),  /* load_unaligned.  */
1194     COSTS_N_INSNS (1),  /* store.  */
1195     COSTS_N_INSNS (1),  /* strd.  */
1196     COSTS_N_INSNS (1),  /* stm_1st.  */
1197     1,                  /* stm_regs_per_insn_1st.  */
1198     2,                  /* stm_regs_per_insn_subsequent.  */
1199     COSTS_N_INSNS (1),  /* storef.  */
1200     COSTS_N_INSNS (1),  /* stored.  */
1201     COSTS_N_INSNS (1),  /* store_unaligned.  */
1202     COSTS_N_INSNS (1),  /* loadv.  */
1203     COSTS_N_INSNS (1)   /* storev.  */
1204   },
1205   {
1206     /* FP SFmode */
1207     {
1208       COSTS_N_INSNS (36),       /* div.  */
1209       COSTS_N_INSNS (11),       /* mult.  */
1210       COSTS_N_INSNS (20),       /* mult_addsub. */
1211       COSTS_N_INSNS (30),       /* fma.  */
1212       COSTS_N_INSNS (9),        /* addsub.  */
1213       COSTS_N_INSNS (3),        /* fpconst.  */
1214       COSTS_N_INSNS (3),        /* neg.  */
1215       COSTS_N_INSNS (6),        /* compare.  */
1216       COSTS_N_INSNS (4),        /* widen.  */
1217       COSTS_N_INSNS (4),        /* narrow.  */
1218       COSTS_N_INSNS (8),        /* toint.  */
1219       COSTS_N_INSNS (8),        /* fromint.  */
1220       COSTS_N_INSNS (8)         /* roundint.  */
1221     },
1222     /* FP DFmode */
1223     {
1224       COSTS_N_INSNS (64),       /* div.  */
1225       COSTS_N_INSNS (16),       /* mult.  */
1226       COSTS_N_INSNS (25),       /* mult_addsub.  */
1227       COSTS_N_INSNS (30),       /* fma.  */
1228       COSTS_N_INSNS (9),        /* addsub.  */
1229       COSTS_N_INSNS (3),        /* fpconst.  */
1230       COSTS_N_INSNS (3),        /* neg.  */
1231       COSTS_N_INSNS (6),        /* compare.  */
1232       COSTS_N_INSNS (6),        /* widen.  */
1233       COSTS_N_INSNS (6),        /* narrow.  */
1234       COSTS_N_INSNS (8),        /* toint.  */
1235       COSTS_N_INSNS (8),        /* fromint.  */
1236       COSTS_N_INSNS (8)         /* roundint.  */
1237     }
1238   },
1239   /* Vector */
1240   {
1241     COSTS_N_INSNS (1)   /* alu.  */
1242   }
1243 };
1244
1245 const struct cpu_cost_table cortexa5_extra_costs =
1246 {
1247   /* ALU */
1248   {
1249     0,                  /* arith.  */
1250     0,                  /* logical.  */
1251     COSTS_N_INSNS (1),  /* shift.  */
1252     COSTS_N_INSNS (1),  /* shift_reg.  */
1253     COSTS_N_INSNS (1),  /* arith_shift.  */
1254     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1255     COSTS_N_INSNS (1),  /* log_shift.  */
1256     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1257     COSTS_N_INSNS (1),  /* extend.  */
1258     COSTS_N_INSNS (1),  /* extend_arith.  */
1259     COSTS_N_INSNS (1),  /* bfi.  */
1260     COSTS_N_INSNS (1),  /* bfx.  */
1261     COSTS_N_INSNS (1),  /* clz.  */
1262     COSTS_N_INSNS (1),  /* rev.  */
1263     0,                  /* non_exec.  */
1264     true                /* non_exec_costs_exec.  */
1265   },
1266
1267   {
1268     /* MULT SImode */
1269     {
1270       0,                        /* simple.  */
1271       COSTS_N_INSNS (1),        /* flag_setting.  */
1272       COSTS_N_INSNS (1),        /* extend.  */
1273       COSTS_N_INSNS (1),        /* add.  */
1274       COSTS_N_INSNS (1),        /* extend_add.  */
1275       COSTS_N_INSNS (7)         /* idiv.  */
1276     },
1277     /* MULT DImode */
1278     {
1279       0,                        /* simple (N/A).  */
1280       0,                        /* flag_setting (N/A).  */
1281       COSTS_N_INSNS (1),        /* extend.  */
1282       0,                        /* add.  */
1283       COSTS_N_INSNS (2),        /* extend_add.  */
1284       0                         /* idiv (N/A).  */
1285     }
1286   },
1287   /* LD/ST */
1288   {
1289     COSTS_N_INSNS (1),  /* load.  */
1290     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1291     COSTS_N_INSNS (6),  /* ldrd.  */
1292     COSTS_N_INSNS (1),  /* ldm_1st.  */
1293     1,                  /* ldm_regs_per_insn_1st.  */
1294     2,                  /* ldm_regs_per_insn_subsequent.  */
1295     COSTS_N_INSNS (2),  /* loadf.  */
1296     COSTS_N_INSNS (4),  /* loadd.  */
1297     COSTS_N_INSNS (1),  /* load_unaligned.  */
1298     COSTS_N_INSNS (1),  /* store.  */
1299     COSTS_N_INSNS (3),  /* strd.  */
1300     COSTS_N_INSNS (1),  /* stm_1st.  */
1301     1,                  /* stm_regs_per_insn_1st.  */
1302     2,                  /* stm_regs_per_insn_subsequent.  */
1303     COSTS_N_INSNS (2),  /* storef.  */
1304     COSTS_N_INSNS (2),  /* stored.  */
1305     COSTS_N_INSNS (1),  /* store_unaligned.  */
1306     COSTS_N_INSNS (1),  /* loadv.  */
1307     COSTS_N_INSNS (1)   /* storev.  */
1308   },
1309   {
1310     /* FP SFmode */
1311     {
1312       COSTS_N_INSNS (15),       /* div.  */
1313       COSTS_N_INSNS (3),        /* mult.  */
1314       COSTS_N_INSNS (7),        /* mult_addsub. */
1315       COSTS_N_INSNS (7),        /* fma.  */
1316       COSTS_N_INSNS (3),        /* addsub.  */
1317       COSTS_N_INSNS (3),        /* fpconst.  */
1318       COSTS_N_INSNS (3),        /* neg.  */
1319       COSTS_N_INSNS (3),        /* compare.  */
1320       COSTS_N_INSNS (3),        /* widen.  */
1321       COSTS_N_INSNS (3),        /* narrow.  */
1322       COSTS_N_INSNS (3),        /* toint.  */
1323       COSTS_N_INSNS (3),        /* fromint.  */
1324       COSTS_N_INSNS (3)         /* roundint.  */
1325     },
1326     /* FP DFmode */
1327     {
1328       COSTS_N_INSNS (30),       /* div.  */
1329       COSTS_N_INSNS (6),        /* mult.  */
1330       COSTS_N_INSNS (10),       /* mult_addsub.  */
1331       COSTS_N_INSNS (7),        /* fma.  */
1332       COSTS_N_INSNS (3),        /* addsub.  */
1333       COSTS_N_INSNS (3),        /* fpconst.  */
1334       COSTS_N_INSNS (3),        /* neg.  */
1335       COSTS_N_INSNS (3),        /* compare.  */
1336       COSTS_N_INSNS (3),        /* widen.  */
1337       COSTS_N_INSNS (3),        /* narrow.  */
1338       COSTS_N_INSNS (3),        /* toint.  */
1339       COSTS_N_INSNS (3),        /* fromint.  */
1340       COSTS_N_INSNS (3)         /* roundint.  */
1341     }
1342   },
1343   /* Vector */
1344   {
1345     COSTS_N_INSNS (1)   /* alu.  */
1346   }
1347 };
1348
1349
1350 const struct cpu_cost_table cortexa7_extra_costs =
1351 {
1352   /* ALU */
1353   {
1354     0,                  /* arith.  */
1355     0,                  /* logical.  */
1356     COSTS_N_INSNS (1),  /* shift.  */
1357     COSTS_N_INSNS (1),  /* shift_reg.  */
1358     COSTS_N_INSNS (1),  /* arith_shift.  */
1359     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1360     COSTS_N_INSNS (1),  /* log_shift.  */
1361     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1362     COSTS_N_INSNS (1),  /* extend.  */
1363     COSTS_N_INSNS (1),  /* extend_arith.  */
1364     COSTS_N_INSNS (1),  /* bfi.  */
1365     COSTS_N_INSNS (1),  /* bfx.  */
1366     COSTS_N_INSNS (1),  /* clz.  */
1367     COSTS_N_INSNS (1),  /* rev.  */
1368     0,                  /* non_exec.  */
1369     true                /* non_exec_costs_exec.  */
1370   },
1371
1372   {
1373     /* MULT SImode */
1374     {
1375       0,                        /* simple.  */
1376       COSTS_N_INSNS (1),        /* flag_setting.  */
1377       COSTS_N_INSNS (1),        /* extend.  */
1378       COSTS_N_INSNS (1),        /* add.  */
1379       COSTS_N_INSNS (1),        /* extend_add.  */
1380       COSTS_N_INSNS (7)         /* idiv.  */
1381     },
1382     /* MULT DImode */
1383     {
1384       0,                        /* simple (N/A).  */
1385       0,                        /* flag_setting (N/A).  */
1386       COSTS_N_INSNS (1),        /* extend.  */
1387       0,                        /* add.  */
1388       COSTS_N_INSNS (2),        /* extend_add.  */
1389       0                         /* idiv (N/A).  */
1390     }
1391   },
1392   /* LD/ST */
1393   {
1394     COSTS_N_INSNS (1),  /* load.  */
1395     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1396     COSTS_N_INSNS (3),  /* ldrd.  */
1397     COSTS_N_INSNS (1),  /* ldm_1st.  */
1398     1,                  /* ldm_regs_per_insn_1st.  */
1399     2,                  /* ldm_regs_per_insn_subsequent.  */
1400     COSTS_N_INSNS (2),  /* loadf.  */
1401     COSTS_N_INSNS (2),  /* loadd.  */
1402     COSTS_N_INSNS (1),  /* load_unaligned.  */
1403     COSTS_N_INSNS (1),  /* store.  */
1404     COSTS_N_INSNS (3),  /* strd.  */
1405     COSTS_N_INSNS (1),  /* stm_1st.  */
1406     1,                  /* stm_regs_per_insn_1st.  */
1407     2,                  /* stm_regs_per_insn_subsequent.  */
1408     COSTS_N_INSNS (2),  /* storef.  */
1409     COSTS_N_INSNS (2),  /* stored.  */
1410     COSTS_N_INSNS (1),  /* store_unaligned.  */
1411     COSTS_N_INSNS (1),  /* loadv.  */
1412     COSTS_N_INSNS (1)   /* storev.  */
1413   },
1414   {
1415     /* FP SFmode */
1416     {
1417       COSTS_N_INSNS (15),       /* div.  */
1418       COSTS_N_INSNS (3),        /* mult.  */
1419       COSTS_N_INSNS (7),        /* mult_addsub. */
1420       COSTS_N_INSNS (7),        /* fma.  */
1421       COSTS_N_INSNS (3),        /* addsub.  */
1422       COSTS_N_INSNS (3),        /* fpconst.  */
1423       COSTS_N_INSNS (3),        /* neg.  */
1424       COSTS_N_INSNS (3),        /* compare.  */
1425       COSTS_N_INSNS (3),        /* widen.  */
1426       COSTS_N_INSNS (3),        /* narrow.  */
1427       COSTS_N_INSNS (3),        /* toint.  */
1428       COSTS_N_INSNS (3),        /* fromint.  */
1429       COSTS_N_INSNS (3)         /* roundint.  */
1430     },
1431     /* FP DFmode */
1432     {
1433       COSTS_N_INSNS (30),       /* div.  */
1434       COSTS_N_INSNS (6),        /* mult.  */
1435       COSTS_N_INSNS (10),       /* mult_addsub.  */
1436       COSTS_N_INSNS (7),        /* fma.  */
1437       COSTS_N_INSNS (3),        /* addsub.  */
1438       COSTS_N_INSNS (3),        /* fpconst.  */
1439       COSTS_N_INSNS (3),        /* neg.  */
1440       COSTS_N_INSNS (3),        /* compare.  */
1441       COSTS_N_INSNS (3),        /* widen.  */
1442       COSTS_N_INSNS (3),        /* narrow.  */
1443       COSTS_N_INSNS (3),        /* toint.  */
1444       COSTS_N_INSNS (3),        /* fromint.  */
1445       COSTS_N_INSNS (3)         /* roundint.  */
1446     }
1447   },
1448   /* Vector */
1449   {
1450     COSTS_N_INSNS (1)   /* alu.  */
1451   }
1452 };
1453
1454 const struct cpu_cost_table cortexa12_extra_costs =
1455 {
1456   /* ALU */
1457   {
1458     0,                  /* arith.  */
1459     0,                  /* logical.  */
1460     0,                  /* shift.  */
1461     COSTS_N_INSNS (1),  /* shift_reg.  */
1462     COSTS_N_INSNS (1),  /* arith_shift.  */
1463     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1464     COSTS_N_INSNS (1),  /* log_shift.  */
1465     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1466     0,                  /* extend.  */
1467     COSTS_N_INSNS (1),  /* extend_arith.  */
1468     0,                  /* bfi.  */
1469     COSTS_N_INSNS (1),  /* bfx.  */
1470     COSTS_N_INSNS (1),  /* clz.  */
1471     COSTS_N_INSNS (1),  /* rev.  */
1472     0,                  /* non_exec.  */
1473     true                /* non_exec_costs_exec.  */
1474   },
1475   /* MULT SImode */
1476   {
1477     {
1478       COSTS_N_INSNS (2),        /* simple.  */
1479       COSTS_N_INSNS (3),        /* flag_setting.  */
1480       COSTS_N_INSNS (2),        /* extend.  */
1481       COSTS_N_INSNS (3),        /* add.  */
1482       COSTS_N_INSNS (2),        /* extend_add.  */
1483       COSTS_N_INSNS (18)        /* idiv.  */
1484     },
1485     /* MULT DImode */
1486     {
1487       0,                        /* simple (N/A).  */
1488       0,                        /* flag_setting (N/A).  */
1489       COSTS_N_INSNS (3),        /* extend.  */
1490       0,                        /* add (N/A).  */
1491       COSTS_N_INSNS (3),        /* extend_add.  */
1492       0                         /* idiv (N/A).  */
1493     }
1494   },
1495   /* LD/ST */
1496   {
1497     COSTS_N_INSNS (3),  /* load.  */
1498     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1499     COSTS_N_INSNS (3),  /* ldrd.  */
1500     COSTS_N_INSNS (3),  /* ldm_1st.  */
1501     1,                  /* ldm_regs_per_insn_1st.  */
1502     2,                  /* ldm_regs_per_insn_subsequent.  */
1503     COSTS_N_INSNS (3),  /* loadf.  */
1504     COSTS_N_INSNS (3),  /* loadd.  */
1505     0,                  /* load_unaligned.  */
1506     0,                  /* store.  */
1507     0,                  /* strd.  */
1508     0,                  /* stm_1st.  */
1509     1,                  /* stm_regs_per_insn_1st.  */
1510     2,                  /* stm_regs_per_insn_subsequent.  */
1511     COSTS_N_INSNS (2),  /* storef.  */
1512     COSTS_N_INSNS (2),  /* stored.  */
1513     0,                  /* store_unaligned.  */
1514     COSTS_N_INSNS (1),  /* loadv.  */
1515     COSTS_N_INSNS (1)   /* storev.  */
1516   },
1517   {
1518     /* FP SFmode */
1519     {
1520       COSTS_N_INSNS (17),       /* div.  */
1521       COSTS_N_INSNS (4),        /* mult.  */
1522       COSTS_N_INSNS (8),        /* mult_addsub. */
1523       COSTS_N_INSNS (8),        /* fma.  */
1524       COSTS_N_INSNS (4),        /* addsub.  */
1525       COSTS_N_INSNS (2),        /* fpconst. */
1526       COSTS_N_INSNS (2),        /* neg.  */
1527       COSTS_N_INSNS (2),        /* compare.  */
1528       COSTS_N_INSNS (4),        /* widen.  */
1529       COSTS_N_INSNS (4),        /* narrow.  */
1530       COSTS_N_INSNS (4),        /* toint.  */
1531       COSTS_N_INSNS (4),        /* fromint.  */
1532       COSTS_N_INSNS (4)         /* roundint.  */
1533     },
1534     /* FP DFmode */
1535     {
1536       COSTS_N_INSNS (31),       /* div.  */
1537       COSTS_N_INSNS (4),        /* mult.  */
1538       COSTS_N_INSNS (8),        /* mult_addsub.  */
1539       COSTS_N_INSNS (8),        /* fma.  */
1540       COSTS_N_INSNS (4),        /* addsub.  */
1541       COSTS_N_INSNS (2),        /* fpconst.  */
1542       COSTS_N_INSNS (2),        /* neg.  */
1543       COSTS_N_INSNS (2),        /* compare.  */
1544       COSTS_N_INSNS (4),        /* widen.  */
1545       COSTS_N_INSNS (4),        /* narrow.  */
1546       COSTS_N_INSNS (4),        /* toint.  */
1547       COSTS_N_INSNS (4),        /* fromint.  */
1548       COSTS_N_INSNS (4)         /* roundint.  */
1549     }
1550   },
1551   /* Vector */
1552   {
1553     COSTS_N_INSNS (1)   /* alu.  */
1554   }
1555 };
1556
1557 const struct cpu_cost_table cortexa15_extra_costs =
1558 {
1559   /* ALU */
1560   {
1561     0,                  /* arith.  */
1562     0,                  /* logical.  */
1563     0,                  /* shift.  */
1564     0,                  /* shift_reg.  */
1565     COSTS_N_INSNS (1),  /* arith_shift.  */
1566     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1567     COSTS_N_INSNS (1),  /* log_shift.  */
1568     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1569     0,                  /* extend.  */
1570     COSTS_N_INSNS (1),  /* extend_arith.  */
1571     COSTS_N_INSNS (1),  /* bfi.  */
1572     0,                  /* bfx.  */
1573     0,                  /* clz.  */
1574     0,                  /* rev.  */
1575     0,                  /* non_exec.  */
1576     true                /* non_exec_costs_exec.  */
1577   },
1578   /* MULT SImode */
1579   {
1580     {
1581       COSTS_N_INSNS (2),        /* simple.  */
1582       COSTS_N_INSNS (3),        /* flag_setting.  */
1583       COSTS_N_INSNS (2),        /* extend.  */
1584       COSTS_N_INSNS (2),        /* add.  */
1585       COSTS_N_INSNS (2),        /* extend_add.  */
1586       COSTS_N_INSNS (18)        /* idiv.  */
1587     },
1588     /* MULT DImode */
1589     {
1590       0,                        /* simple (N/A).  */
1591       0,                        /* flag_setting (N/A).  */
1592       COSTS_N_INSNS (3),        /* extend.  */
1593       0,                        /* add (N/A).  */
1594       COSTS_N_INSNS (3),        /* extend_add.  */
1595       0                         /* idiv (N/A).  */
1596     }
1597   },
1598   /* LD/ST */
1599   {
1600     COSTS_N_INSNS (3),  /* load.  */
1601     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1602     COSTS_N_INSNS (3),  /* ldrd.  */
1603     COSTS_N_INSNS (4),  /* ldm_1st.  */
1604     1,                  /* ldm_regs_per_insn_1st.  */
1605     2,                  /* ldm_regs_per_insn_subsequent.  */
1606     COSTS_N_INSNS (4),  /* loadf.  */
1607     COSTS_N_INSNS (4),  /* loadd.  */
1608     0,                  /* load_unaligned.  */
1609     0,                  /* store.  */
1610     0,                  /* strd.  */
1611     COSTS_N_INSNS (1),  /* stm_1st.  */
1612     1,                  /* stm_regs_per_insn_1st.  */
1613     2,                  /* stm_regs_per_insn_subsequent.  */
1614     0,                  /* storef.  */
1615     0,                  /* stored.  */
1616     0,                  /* store_unaligned.  */
1617     COSTS_N_INSNS (1),  /* loadv.  */
1618     COSTS_N_INSNS (1)   /* storev.  */
1619   },
1620   {
1621     /* FP SFmode */
1622     {
1623       COSTS_N_INSNS (17),       /* div.  */
1624       COSTS_N_INSNS (4),        /* mult.  */
1625       COSTS_N_INSNS (8),        /* mult_addsub. */
1626       COSTS_N_INSNS (8),        /* fma.  */
1627       COSTS_N_INSNS (4),        /* addsub.  */
1628       COSTS_N_INSNS (2),        /* fpconst. */
1629       COSTS_N_INSNS (2),        /* neg.  */
1630       COSTS_N_INSNS (5),        /* compare.  */
1631       COSTS_N_INSNS (4),        /* widen.  */
1632       COSTS_N_INSNS (4),        /* narrow.  */
1633       COSTS_N_INSNS (4),        /* toint.  */
1634       COSTS_N_INSNS (4),        /* fromint.  */
1635       COSTS_N_INSNS (4)         /* roundint.  */
1636     },
1637     /* FP DFmode */
1638     {
1639       COSTS_N_INSNS (31),       /* div.  */
1640       COSTS_N_INSNS (4),        /* mult.  */
1641       COSTS_N_INSNS (8),        /* mult_addsub.  */
1642       COSTS_N_INSNS (8),        /* fma.  */
1643       COSTS_N_INSNS (4),        /* addsub.  */
1644       COSTS_N_INSNS (2),        /* fpconst.  */
1645       COSTS_N_INSNS (2),        /* neg.  */
1646       COSTS_N_INSNS (2),        /* compare.  */
1647       COSTS_N_INSNS (4),        /* widen.  */
1648       COSTS_N_INSNS (4),        /* narrow.  */
1649       COSTS_N_INSNS (4),        /* toint.  */
1650       COSTS_N_INSNS (4),        /* fromint.  */
1651       COSTS_N_INSNS (4)         /* roundint.  */
1652     }
1653   },
1654   /* Vector */
1655   {
1656     COSTS_N_INSNS (1)   /* alu.  */
1657   }
1658 };
1659
1660 const struct cpu_cost_table v7m_extra_costs =
1661 {
1662   /* ALU */
1663   {
1664     0,                  /* arith.  */
1665     0,                  /* logical.  */
1666     0,                  /* shift.  */
1667     0,                  /* shift_reg.  */
1668     0,                  /* arith_shift.  */
1669     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1670     0,                  /* log_shift.  */
1671     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1672     0,                  /* extend.  */
1673     COSTS_N_INSNS (1),  /* extend_arith.  */
1674     0,                  /* bfi.  */
1675     0,                  /* bfx.  */
1676     0,                  /* clz.  */
1677     0,                  /* rev.  */
1678     COSTS_N_INSNS (1),  /* non_exec.  */
1679     false               /* non_exec_costs_exec.  */
1680   },
1681   {
1682     /* MULT SImode */
1683     {
1684       COSTS_N_INSNS (1),        /* simple.  */
1685       COSTS_N_INSNS (1),        /* flag_setting.  */
1686       COSTS_N_INSNS (2),        /* extend.  */
1687       COSTS_N_INSNS (1),        /* add.  */
1688       COSTS_N_INSNS (3),        /* extend_add.  */
1689       COSTS_N_INSNS (8)         /* idiv.  */
1690     },
1691     /* MULT DImode */
1692     {
1693       0,                        /* simple (N/A).  */
1694       0,                        /* flag_setting (N/A).  */
1695       COSTS_N_INSNS (2),        /* extend.  */
1696       0,                        /* add (N/A).  */
1697       COSTS_N_INSNS (3),        /* extend_add.  */
1698       0                         /* idiv (N/A).  */
1699     }
1700   },
1701   /* LD/ST */
1702   {
1703     COSTS_N_INSNS (2),  /* load.  */
1704     0,                  /* load_sign_extend.  */
1705     COSTS_N_INSNS (3),  /* ldrd.  */
1706     COSTS_N_INSNS (2),  /* ldm_1st.  */
1707     1,                  /* ldm_regs_per_insn_1st.  */
1708     1,                  /* ldm_regs_per_insn_subsequent.  */
1709     COSTS_N_INSNS (2),  /* loadf.  */
1710     COSTS_N_INSNS (3),  /* loadd.  */
1711     COSTS_N_INSNS (1),  /* load_unaligned.  */
1712     COSTS_N_INSNS (2),  /* store.  */
1713     COSTS_N_INSNS (3),  /* strd.  */
1714     COSTS_N_INSNS (2),  /* stm_1st.  */
1715     1,                  /* stm_regs_per_insn_1st.  */
1716     1,                  /* stm_regs_per_insn_subsequent.  */
1717     COSTS_N_INSNS (2),  /* storef.  */
1718     COSTS_N_INSNS (3),  /* stored.  */
1719     COSTS_N_INSNS (1),  /* store_unaligned.  */
1720     COSTS_N_INSNS (1),  /* loadv.  */
1721     COSTS_N_INSNS (1)   /* storev.  */
1722   },
1723   {
1724     /* FP SFmode */
1725     {
1726       COSTS_N_INSNS (7),        /* div.  */
1727       COSTS_N_INSNS (2),        /* mult.  */
1728       COSTS_N_INSNS (5),        /* mult_addsub.  */
1729       COSTS_N_INSNS (3),        /* fma.  */
1730       COSTS_N_INSNS (1),        /* addsub.  */
1731       0,                        /* fpconst.  */
1732       0,                        /* neg.  */
1733       0,                        /* compare.  */
1734       0,                        /* widen.  */
1735       0,                        /* narrow.  */
1736       0,                        /* toint.  */
1737       0,                        /* fromint.  */
1738       0                         /* roundint.  */
1739     },
1740     /* FP DFmode */
1741     {
1742       COSTS_N_INSNS (15),       /* div.  */
1743       COSTS_N_INSNS (5),        /* mult.  */
1744       COSTS_N_INSNS (7),        /* mult_addsub.  */
1745       COSTS_N_INSNS (7),        /* fma.  */
1746       COSTS_N_INSNS (3),        /* addsub.  */
1747       0,                        /* fpconst.  */
1748       0,                        /* neg.  */
1749       0,                        /* compare.  */
1750       0,                        /* widen.  */
1751       0,                        /* narrow.  */
1752       0,                        /* toint.  */
1753       0,                        /* fromint.  */
1754       0                         /* roundint.  */
1755     }
1756   },
1757   /* Vector */
1758   {
1759     COSTS_N_INSNS (1)   /* alu.  */
1760   }
1761 };
1762
1763 const struct tune_params arm_slowmul_tune =
1764 {
1765   &generic_extra_costs,                 /* Insn extra costs.  */
1766   NULL,                                 /* Sched adj cost.  */
1767   arm_default_branch_cost,
1768   &arm_default_vec_cost,
1769   3,                                            /* Constant limit.  */
1770   5,                                            /* Max cond insns.  */
1771   8,                                            /* Memset max inline.  */
1772   1,                                            /* Issue rate.  */
1773   ARM_PREFETCH_NOT_BENEFICIAL,
1774   tune_params::PREF_CONST_POOL_TRUE,
1775   tune_params::PREF_LDRD_FALSE,
1776   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1777   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1778   tune_params::DISPARAGE_FLAGS_NEITHER,
1779   tune_params::PREF_NEON_64_FALSE,
1780   tune_params::PREF_NEON_STRINGOPS_FALSE,
1781   tune_params::FUSE_NOTHING,
1782   tune_params::SCHED_AUTOPREF_OFF
1783 };
1784
1785 const struct tune_params arm_fastmul_tune =
1786 {
1787   &generic_extra_costs,                 /* Insn extra costs.  */
1788   NULL,                                 /* Sched adj cost.  */
1789   arm_default_branch_cost,
1790   &arm_default_vec_cost,
1791   1,                                            /* Constant limit.  */
1792   5,                                            /* Max cond insns.  */
1793   8,                                            /* Memset max inline.  */
1794   1,                                            /* Issue rate.  */
1795   ARM_PREFETCH_NOT_BENEFICIAL,
1796   tune_params::PREF_CONST_POOL_TRUE,
1797   tune_params::PREF_LDRD_FALSE,
1798   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1799   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1800   tune_params::DISPARAGE_FLAGS_NEITHER,
1801   tune_params::PREF_NEON_64_FALSE,
1802   tune_params::PREF_NEON_STRINGOPS_FALSE,
1803   tune_params::FUSE_NOTHING,
1804   tune_params::SCHED_AUTOPREF_OFF
1805 };
1806
1807 /* StrongARM has early execution of branches, so a sequence that is worth
1808    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1809
1810 const struct tune_params arm_strongarm_tune =
1811 {
1812   &generic_extra_costs,                 /* Insn extra costs.  */
1813   NULL,                                 /* Sched adj cost.  */
1814   arm_default_branch_cost,
1815   &arm_default_vec_cost,
1816   1,                                            /* Constant limit.  */
1817   3,                                            /* Max cond insns.  */
1818   8,                                            /* Memset max inline.  */
1819   1,                                            /* Issue rate.  */
1820   ARM_PREFETCH_NOT_BENEFICIAL,
1821   tune_params::PREF_CONST_POOL_TRUE,
1822   tune_params::PREF_LDRD_FALSE,
1823   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1824   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1825   tune_params::DISPARAGE_FLAGS_NEITHER,
1826   tune_params::PREF_NEON_64_FALSE,
1827   tune_params::PREF_NEON_STRINGOPS_FALSE,
1828   tune_params::FUSE_NOTHING,
1829   tune_params::SCHED_AUTOPREF_OFF
1830 };
1831
1832 const struct tune_params arm_xscale_tune =
1833 {
1834   &generic_extra_costs,                 /* Insn extra costs.  */
1835   xscale_sched_adjust_cost,
1836   arm_default_branch_cost,
1837   &arm_default_vec_cost,
1838   2,                                            /* Constant limit.  */
1839   3,                                            /* Max cond insns.  */
1840   8,                                            /* Memset max inline.  */
1841   1,                                            /* Issue rate.  */
1842   ARM_PREFETCH_NOT_BENEFICIAL,
1843   tune_params::PREF_CONST_POOL_TRUE,
1844   tune_params::PREF_LDRD_FALSE,
1845   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1846   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1847   tune_params::DISPARAGE_FLAGS_NEITHER,
1848   tune_params::PREF_NEON_64_FALSE,
1849   tune_params::PREF_NEON_STRINGOPS_FALSE,
1850   tune_params::FUSE_NOTHING,
1851   tune_params::SCHED_AUTOPREF_OFF
1852 };
1853
1854 const struct tune_params arm_9e_tune =
1855 {
1856   &generic_extra_costs,                 /* Insn extra costs.  */
1857   NULL,                                 /* Sched adj cost.  */
1858   arm_default_branch_cost,
1859   &arm_default_vec_cost,
1860   1,                                            /* Constant limit.  */
1861   5,                                            /* Max cond insns.  */
1862   8,                                            /* Memset max inline.  */
1863   1,                                            /* Issue rate.  */
1864   ARM_PREFETCH_NOT_BENEFICIAL,
1865   tune_params::PREF_CONST_POOL_TRUE,
1866   tune_params::PREF_LDRD_FALSE,
1867   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1868   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1869   tune_params::DISPARAGE_FLAGS_NEITHER,
1870   tune_params::PREF_NEON_64_FALSE,
1871   tune_params::PREF_NEON_STRINGOPS_FALSE,
1872   tune_params::FUSE_NOTHING,
1873   tune_params::SCHED_AUTOPREF_OFF
1874 };
1875
1876 const struct tune_params arm_marvell_pj4_tune =
1877 {
1878   &generic_extra_costs,                 /* Insn extra costs.  */
1879   NULL,                                 /* Sched adj cost.  */
1880   arm_default_branch_cost,
1881   &arm_default_vec_cost,
1882   1,                                            /* Constant limit.  */
1883   5,                                            /* Max cond insns.  */
1884   8,                                            /* Memset max inline.  */
1885   2,                                            /* Issue rate.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   tune_params::PREF_CONST_POOL_TRUE,
1888   tune_params::PREF_LDRD_FALSE,
1889   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1890   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1891   tune_params::DISPARAGE_FLAGS_NEITHER,
1892   tune_params::PREF_NEON_64_FALSE,
1893   tune_params::PREF_NEON_STRINGOPS_FALSE,
1894   tune_params::FUSE_NOTHING,
1895   tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 const struct tune_params arm_v6t2_tune =
1899 {
1900   &generic_extra_costs,                 /* Insn extra costs.  */
1901   NULL,                                 /* Sched adj cost.  */
1902   arm_default_branch_cost,
1903   &arm_default_vec_cost,
1904   1,                                            /* Constant limit.  */
1905   5,                                            /* Max cond insns.  */
1906   8,                                            /* Memset max inline.  */
1907   1,                                            /* Issue rate.  */
1908   ARM_PREFETCH_NOT_BENEFICIAL,
1909   tune_params::PREF_CONST_POOL_FALSE,
1910   tune_params::PREF_LDRD_FALSE,
1911   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1912   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1913   tune_params::DISPARAGE_FLAGS_NEITHER,
1914   tune_params::PREF_NEON_64_FALSE,
1915   tune_params::PREF_NEON_STRINGOPS_FALSE,
1916   tune_params::FUSE_NOTHING,
1917   tune_params::SCHED_AUTOPREF_OFF
1918 };
1919
1920
1921 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1922 const struct tune_params arm_cortex_tune =
1923 {
1924   &generic_extra_costs,
1925   NULL,                                 /* Sched adj cost.  */
1926   arm_default_branch_cost,
1927   &arm_default_vec_cost,
1928   1,                                            /* Constant limit.  */
1929   5,                                            /* Max cond insns.  */
1930   8,                                            /* Memset max inline.  */
1931   2,                                            /* Issue rate.  */
1932   ARM_PREFETCH_NOT_BENEFICIAL,
1933   tune_params::PREF_CONST_POOL_FALSE,
1934   tune_params::PREF_LDRD_FALSE,
1935   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1936   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1937   tune_params::DISPARAGE_FLAGS_NEITHER,
1938   tune_params::PREF_NEON_64_FALSE,
1939   tune_params::PREF_NEON_STRINGOPS_FALSE,
1940   tune_params::FUSE_NOTHING,
1941   tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_cortex_a8_tune =
1945 {
1946   &cortexa8_extra_costs,
1947   NULL,                                 /* Sched adj cost.  */
1948   arm_default_branch_cost,
1949   &arm_default_vec_cost,
1950   1,                                            /* Constant limit.  */
1951   5,                                            /* Max cond insns.  */
1952   8,                                            /* Memset max inline.  */
1953   2,                                            /* Issue rate.  */
1954   ARM_PREFETCH_NOT_BENEFICIAL,
1955   tune_params::PREF_CONST_POOL_FALSE,
1956   tune_params::PREF_LDRD_FALSE,
1957   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1958   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1959   tune_params::DISPARAGE_FLAGS_NEITHER,
1960   tune_params::PREF_NEON_64_FALSE,
1961   tune_params::PREF_NEON_STRINGOPS_TRUE,
1962   tune_params::FUSE_NOTHING,
1963   tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_cortex_a7_tune =
1967 {
1968   &cortexa7_extra_costs,
1969   NULL,                                 /* Sched adj cost.  */
1970   arm_default_branch_cost,
1971   &arm_default_vec_cost,
1972   1,                                            /* Constant limit.  */
1973   5,                                            /* Max cond insns.  */
1974   8,                                            /* Memset max inline.  */
1975   2,                                            /* Issue rate.  */
1976   ARM_PREFETCH_NOT_BENEFICIAL,
1977   tune_params::PREF_CONST_POOL_FALSE,
1978   tune_params::PREF_LDRD_FALSE,
1979   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1980   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1981   tune_params::DISPARAGE_FLAGS_NEITHER,
1982   tune_params::PREF_NEON_64_FALSE,
1983   tune_params::PREF_NEON_STRINGOPS_TRUE,
1984   tune_params::FUSE_NOTHING,
1985   tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_cortex_a15_tune =
1989 {
1990   &cortexa15_extra_costs,
1991   NULL,                                 /* Sched adj cost.  */
1992   arm_default_branch_cost,
1993   &arm_default_vec_cost,
1994   1,                                            /* Constant limit.  */
1995   2,                                            /* Max cond insns.  */
1996   8,                                            /* Memset max inline.  */
1997   3,                                            /* Issue rate.  */
1998   ARM_PREFETCH_NOT_BENEFICIAL,
1999   tune_params::PREF_CONST_POOL_FALSE,
2000   tune_params::PREF_LDRD_TRUE,
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2002   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2003   tune_params::DISPARAGE_FLAGS_ALL,
2004   tune_params::PREF_NEON_64_FALSE,
2005   tune_params::PREF_NEON_STRINGOPS_TRUE,
2006   tune_params::FUSE_NOTHING,
2007   tune_params::SCHED_AUTOPREF_FULL
2008 };
2009
2010 const struct tune_params arm_cortex_a35_tune =
2011 {
2012   &cortexa53_extra_costs,
2013   NULL,                                 /* Sched adj cost.  */
2014   arm_default_branch_cost,
2015   &arm_default_vec_cost,
2016   1,                                            /* Constant limit.  */
2017   5,                                            /* Max cond insns.  */
2018   8,                                            /* Memset max inline.  */
2019   1,                                            /* Issue rate.  */
2020   ARM_PREFETCH_NOT_BENEFICIAL,
2021   tune_params::PREF_CONST_POOL_FALSE,
2022   tune_params::PREF_LDRD_FALSE,
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2025   tune_params::DISPARAGE_FLAGS_NEITHER,
2026   tune_params::PREF_NEON_64_FALSE,
2027   tune_params::PREF_NEON_STRINGOPS_TRUE,
2028   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a53_tune =
2033 {
2034   &cortexa53_extra_costs,
2035   NULL,                                 /* Sched adj cost.  */
2036   arm_default_branch_cost,
2037   &arm_default_vec_cost,
2038   1,                                            /* Constant limit.  */
2039   5,                                            /* Max cond insns.  */
2040   8,                                            /* Memset max inline.  */
2041   2,                                            /* Issue rate.  */
2042   ARM_PREFETCH_NOT_BENEFICIAL,
2043   tune_params::PREF_CONST_POOL_FALSE,
2044   tune_params::PREF_LDRD_FALSE,
2045   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2047   tune_params::DISPARAGE_FLAGS_NEITHER,
2048   tune_params::PREF_NEON_64_FALSE,
2049   tune_params::PREF_NEON_STRINGOPS_TRUE,
2050   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2051   tune_params::SCHED_AUTOPREF_OFF
2052 };
2053
2054 const struct tune_params arm_cortex_a57_tune =
2055 {
2056   &cortexa57_extra_costs,
2057   NULL,                                 /* Sched adj cost.  */
2058   arm_default_branch_cost,
2059   &arm_default_vec_cost,
2060   1,                                            /* Constant limit.  */
2061   2,                                            /* Max cond insns.  */
2062   8,                                            /* Memset max inline.  */
2063   3,                                            /* Issue rate.  */
2064   ARM_PREFETCH_NOT_BENEFICIAL,
2065   tune_params::PREF_CONST_POOL_FALSE,
2066   tune_params::PREF_LDRD_TRUE,
2067   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2068   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2069   tune_params::DISPARAGE_FLAGS_ALL,
2070   tune_params::PREF_NEON_64_FALSE,
2071   tune_params::PREF_NEON_STRINGOPS_TRUE,
2072   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2073   tune_params::SCHED_AUTOPREF_FULL
2074 };
2075
2076 const struct tune_params arm_exynosm1_tune =
2077 {
2078   &exynosm1_extra_costs,
2079   NULL,                                         /* Sched adj cost.  */
2080   arm_default_branch_cost,
2081   &arm_default_vec_cost,
2082   1,                                            /* Constant limit.  */
2083   2,                                            /* Max cond insns.  */
2084   8,                                            /* Memset max inline.  */
2085   3,                                            /* Issue rate.  */
2086   ARM_PREFETCH_NOT_BENEFICIAL,
2087   tune_params::PREF_CONST_POOL_FALSE,
2088   tune_params::PREF_LDRD_TRUE,
2089   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2090   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2091   tune_params::DISPARAGE_FLAGS_ALL,
2092   tune_params::PREF_NEON_64_FALSE,
2093   tune_params::PREF_NEON_STRINGOPS_TRUE,
2094   tune_params::FUSE_NOTHING,
2095   tune_params::SCHED_AUTOPREF_OFF
2096 };
2097
2098 const struct tune_params arm_xgene1_tune =
2099 {
2100   &xgene1_extra_costs,
2101   NULL,                                 /* Sched adj cost.  */
2102   arm_default_branch_cost,
2103   &arm_default_vec_cost,
2104   1,                                            /* Constant limit.  */
2105   2,                                            /* Max cond insns.  */
2106   32,                                           /* Memset max inline.  */
2107   4,                                            /* Issue rate.  */
2108   ARM_PREFETCH_NOT_BENEFICIAL,
2109   tune_params::PREF_CONST_POOL_FALSE,
2110   tune_params::PREF_LDRD_TRUE,
2111   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2112   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2113   tune_params::DISPARAGE_FLAGS_ALL,
2114   tune_params::PREF_NEON_64_FALSE,
2115   tune_params::PREF_NEON_STRINGOPS_FALSE,
2116   tune_params::FUSE_NOTHING,
2117   tune_params::SCHED_AUTOPREF_OFF
2118 };
2119
2120 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2121    less appealing.  Set max_insns_skipped to a low value.  */
2122
2123 const struct tune_params arm_cortex_a5_tune =
2124 {
2125   &cortexa5_extra_costs,
2126   NULL,                                 /* Sched adj cost.  */
2127   arm_cortex_a5_branch_cost,
2128   &arm_default_vec_cost,
2129   1,                                            /* Constant limit.  */
2130   1,                                            /* Max cond insns.  */
2131   8,                                            /* Memset max inline.  */
2132   2,                                            /* Issue rate.  */
2133   ARM_PREFETCH_NOT_BENEFICIAL,
2134   tune_params::PREF_CONST_POOL_FALSE,
2135   tune_params::PREF_LDRD_FALSE,
2136   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2137   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2138   tune_params::DISPARAGE_FLAGS_NEITHER,
2139   tune_params::PREF_NEON_64_FALSE,
2140   tune_params::PREF_NEON_STRINGOPS_TRUE,
2141   tune_params::FUSE_NOTHING,
2142   tune_params::SCHED_AUTOPREF_OFF
2143 };
2144
2145 const struct tune_params arm_cortex_a9_tune =
2146 {
2147   &cortexa9_extra_costs,
2148   cortex_a9_sched_adjust_cost,
2149   arm_default_branch_cost,
2150   &arm_default_vec_cost,
2151   1,                                            /* Constant limit.  */
2152   5,                                            /* Max cond insns.  */
2153   8,                                            /* Memset max inline.  */
2154   2,                                            /* Issue rate.  */
2155   ARM_PREFETCH_BENEFICIAL(4,32,32),
2156   tune_params::PREF_CONST_POOL_FALSE,
2157   tune_params::PREF_LDRD_FALSE,
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2159   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2160   tune_params::DISPARAGE_FLAGS_NEITHER,
2161   tune_params::PREF_NEON_64_FALSE,
2162   tune_params::PREF_NEON_STRINGOPS_FALSE,
2163   tune_params::FUSE_NOTHING,
2164   tune_params::SCHED_AUTOPREF_OFF
2165 };
2166
2167 const struct tune_params arm_cortex_a12_tune =
2168 {
2169   &cortexa12_extra_costs,
2170   NULL,                                 /* Sched adj cost.  */
2171   arm_default_branch_cost,
2172   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2173   1,                                            /* Constant limit.  */
2174   2,                                            /* Max cond insns.  */
2175   8,                                            /* Memset max inline.  */
2176   2,                                            /* Issue rate.  */
2177   ARM_PREFETCH_NOT_BENEFICIAL,
2178   tune_params::PREF_CONST_POOL_FALSE,
2179   tune_params::PREF_LDRD_TRUE,
2180   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2182   tune_params::DISPARAGE_FLAGS_ALL,
2183   tune_params::PREF_NEON_64_FALSE,
2184   tune_params::PREF_NEON_STRINGOPS_TRUE,
2185   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2186   tune_params::SCHED_AUTOPREF_OFF
2187 };
2188
2189 const struct tune_params arm_cortex_a73_tune =
2190 {
2191   &cortexa57_extra_costs,
2192   NULL,                                         /* Sched adj cost.  */
2193   arm_default_branch_cost,
2194   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2195   1,                                            /* Constant limit.  */
2196   2,                                            /* Max cond insns.  */
2197   8,                                            /* Memset max inline.  */
2198   2,                                            /* Issue rate.  */
2199   ARM_PREFETCH_NOT_BENEFICIAL,
2200   tune_params::PREF_CONST_POOL_FALSE,
2201   tune_params::PREF_LDRD_TRUE,
2202   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2203   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2204   tune_params::DISPARAGE_FLAGS_ALL,
2205   tune_params::PREF_NEON_64_FALSE,
2206   tune_params::PREF_NEON_STRINGOPS_TRUE,
2207   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2208   tune_params::SCHED_AUTOPREF_FULL
2209 };
2210
2211 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2212    cycle to execute each.  An LDR from the constant pool also takes two cycles
2213    to execute, but mildly increases pipelining opportunity (consecutive
2214    loads/stores can be pipelined together, saving one cycle), and may also
2215    improve icache utilisation.  Hence we prefer the constant pool for such
2216    processors.  */
2217
2218 const struct tune_params arm_v7m_tune =
2219 {
2220   &v7m_extra_costs,
2221   NULL,                                 /* Sched adj cost.  */
2222   arm_cortex_m_branch_cost,
2223   &arm_default_vec_cost,
2224   1,                                            /* Constant limit.  */
2225   2,                                            /* Max cond insns.  */
2226   8,                                            /* Memset max inline.  */
2227   1,                                            /* Issue rate.  */
2228   ARM_PREFETCH_NOT_BENEFICIAL,
2229   tune_params::PREF_CONST_POOL_TRUE,
2230   tune_params::PREF_LDRD_FALSE,
2231   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2232   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2233   tune_params::DISPARAGE_FLAGS_NEITHER,
2234   tune_params::PREF_NEON_64_FALSE,
2235   tune_params::PREF_NEON_STRINGOPS_FALSE,
2236   tune_params::FUSE_NOTHING,
2237   tune_params::SCHED_AUTOPREF_OFF
2238 };
2239
2240 /* Cortex-M7 tuning.  */
2241
2242 const struct tune_params arm_cortex_m7_tune =
2243 {
2244   &v7m_extra_costs,
2245   NULL,                                 /* Sched adj cost.  */
2246   arm_cortex_m7_branch_cost,
2247   &arm_default_vec_cost,
2248   0,                                            /* Constant limit.  */
2249   1,                                            /* Max cond insns.  */
2250   8,                                            /* Memset max inline.  */
2251   2,                                            /* Issue rate.  */
2252   ARM_PREFETCH_NOT_BENEFICIAL,
2253   tune_params::PREF_CONST_POOL_TRUE,
2254   tune_params::PREF_LDRD_FALSE,
2255   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2256   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2257   tune_params::DISPARAGE_FLAGS_NEITHER,
2258   tune_params::PREF_NEON_64_FALSE,
2259   tune_params::PREF_NEON_STRINGOPS_FALSE,
2260   tune_params::FUSE_NOTHING,
2261   tune_params::SCHED_AUTOPREF_OFF
2262 };
2263
2264 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2265    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2266    cortex-m23.  */
2267 const struct tune_params arm_v6m_tune =
2268 {
2269   &generic_extra_costs,                 /* Insn extra costs.  */
2270   NULL,                                 /* Sched adj cost.  */
2271   arm_default_branch_cost,
2272   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2273   1,                                            /* Constant limit.  */
2274   5,                                            /* Max cond insns.  */
2275   8,                                            /* Memset max inline.  */
2276   1,                                            /* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_FALSE,
2279   tune_params::PREF_LDRD_FALSE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_NEITHER,
2283   tune_params::PREF_NEON_64_FALSE,
2284   tune_params::PREF_NEON_STRINGOPS_FALSE,
2285   tune_params::FUSE_NOTHING,
2286   tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 const struct tune_params arm_fa726te_tune =
2290 {
2291   &generic_extra_costs,                         /* Insn extra costs.  */
2292   fa726te_sched_adjust_cost,
2293   arm_default_branch_cost,
2294   &arm_default_vec_cost,
2295   1,                                            /* Constant limit.  */
2296   5,                                            /* Max cond insns.  */
2297   8,                                            /* Memset max inline.  */
2298   2,                                            /* Issue rate.  */
2299   ARM_PREFETCH_NOT_BENEFICIAL,
2300   tune_params::PREF_CONST_POOL_TRUE,
2301   tune_params::PREF_LDRD_FALSE,
2302   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2303   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2304   tune_params::DISPARAGE_FLAGS_NEITHER,
2305   tune_params::PREF_NEON_64_FALSE,
2306   tune_params::PREF_NEON_STRINGOPS_FALSE,
2307   tune_params::FUSE_NOTHING,
2308   tune_params::SCHED_AUTOPREF_OFF
2309 };
2310
2311 /* Auto-generated CPU, FPU and architecture tables.  */
2312 #include "arm-cpu-data.h"
2313
2314 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2315    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2316    is thus chosen to be big enough to hold the longest architecture name.  */
2317
2318 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2319
2320 /* Supported TLS relocations.  */
2321
2322 enum tls_reloc {
2323   TLS_GD32,
2324   TLS_LDM32,
2325   TLS_LDO32,
2326   TLS_IE32,
2327   TLS_LE32,
2328   TLS_DESCSEQ   /* GNU scheme */
2329 };
2330
2331 /* The maximum number of insns to be used when loading a constant.  */
2332 inline static int
2333 arm_constant_limit (bool size_p)
2334 {
2335   return size_p ? 1 : current_tune->constant_limit;
2336 }
2337
2338 /* Emit an insn that's a simple single-set.  Both the operands must be known
2339    to be valid.  */
2340 inline static rtx_insn *
2341 emit_set_insn (rtx x, rtx y)
2342 {
2343   return emit_insn (gen_rtx_SET (x, y));
2344 }
2345
2346 /* Return the number of bits set in VALUE.  */
2347 static unsigned
2348 bit_count (unsigned long value)
2349 {
2350   unsigned long count = 0;
2351
2352   while (value)
2353     {
2354       count++;
2355       value &= value - 1;  /* Clear the least-significant set bit.  */
2356     }
2357
2358   return count;
2359 }
2360
2361 /* Return the number of bits set in BMAP.  */
2362 static unsigned
2363 bitmap_popcount (const sbitmap bmap)
2364 {
2365   unsigned int count = 0;
2366   unsigned int n = 0;
2367   sbitmap_iterator sbi;
2368
2369   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2370     count++;
2371   return count;
2372 }
2373
2374 typedef struct
2375 {
2376   machine_mode mode;
2377   const char *name;
2378 } arm_fixed_mode_set;
2379
2380 /* A small helper for setting fixed-point library libfuncs.  */
2381
2382 static void
2383 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2384                              const char *funcname, const char *modename,
2385                              int num_suffix)
2386 {
2387   char buffer[50];
2388
2389   if (num_suffix == 0)
2390     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2391   else
2392     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2393
2394   set_optab_libfunc (optable, mode, buffer);
2395 }
2396
2397 static void
2398 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2399                             machine_mode from, const char *funcname,
2400                             const char *toname, const char *fromname)
2401 {
2402   char buffer[50];
2403   const char *maybe_suffix_2 = "";
2404
2405   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2406   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2407       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2408       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2409     maybe_suffix_2 = "2";
2410
2411   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2412            maybe_suffix_2);
2413
2414   set_conv_libfunc (optable, to, from, buffer);
2415 }
2416
2417 /* Set up library functions unique to ARM.  */
2418
2419 static void
2420 arm_init_libfuncs (void)
2421 {
2422   /* For Linux, we have access to kernel support for atomic operations.  */
2423   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2424     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2425
2426   /* There are no special library functions unless we are using the
2427      ARM BPABI.  */
2428   if (!TARGET_BPABI)
2429     return;
2430
2431   /* The functions below are described in Section 4 of the "Run-Time
2432      ABI for the ARM architecture", Version 1.0.  */
2433
2434   /* Double-precision floating-point arithmetic.  Table 2.  */
2435   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2436   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2437   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2438   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2439   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2440
2441   /* Double-precision comparisons.  Table 3.  */
2442   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2443   set_optab_libfunc (ne_optab, DFmode, NULL);
2444   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2445   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2446   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2447   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2448   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2449
2450   /* Single-precision floating-point arithmetic.  Table 4.  */
2451   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2452   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2453   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2454   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2455   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2456
2457   /* Single-precision comparisons.  Table 5.  */
2458   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2459   set_optab_libfunc (ne_optab, SFmode, NULL);
2460   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2461   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2462   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2463   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2464   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2465
2466   /* Floating-point to integer conversions.  Table 6.  */
2467   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2468   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2469   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2470   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2471   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2472   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2473   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2474   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2475
2476   /* Conversions between floating types.  Table 7.  */
2477   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2478   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2479
2480   /* Integer to floating-point conversions.  Table 8.  */
2481   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2482   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2483   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2484   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2485   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2486   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2487   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2488   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2489
2490   /* Long long.  Table 9.  */
2491   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2492   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2493   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2494   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2495   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2496   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2497   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2498   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2499
2500   /* Integer (32/32->32) division.  \S 4.3.1.  */
2501   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2502   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2503
2504   /* The divmod functions are designed so that they can be used for
2505      plain division, even though they return both the quotient and the
2506      remainder.  The quotient is returned in the usual location (i.e.,
2507      r0 for SImode, {r0, r1} for DImode), just as would be expected
2508      for an ordinary division routine.  Because the AAPCS calling
2509      conventions specify that all of { r0, r1, r2, r3 } are
2510      callee-saved registers, there is no need to tell the compiler
2511      explicitly that those registers are clobbered by these
2512      routines.  */
2513   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2514   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2515
2516   /* For SImode division the ABI provides div-without-mod routines,
2517      which are faster.  */
2518   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2519   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2520
2521   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2522      divmod libcalls instead.  */
2523   set_optab_libfunc (smod_optab, DImode, NULL);
2524   set_optab_libfunc (umod_optab, DImode, NULL);
2525   set_optab_libfunc (smod_optab, SImode, NULL);
2526   set_optab_libfunc (umod_optab, SImode, NULL);
2527
2528   /* Half-precision float operations.  The compiler handles all operations
2529      with NULL libfuncs by converting the SFmode.  */
2530   switch (arm_fp16_format)
2531     {
2532     case ARM_FP16_FORMAT_IEEE:
2533     case ARM_FP16_FORMAT_ALTERNATIVE:
2534
2535       /* Conversions.  */
2536       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2537                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538                          ? "__gnu_f2h_ieee"
2539                          : "__gnu_f2h_alternative"));
2540       set_conv_libfunc (sext_optab, SFmode, HFmode,
2541                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542                          ? "__gnu_h2f_ieee"
2543                          : "__gnu_h2f_alternative"));
2544
2545       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2546                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2547                          ? "__gnu_d2h_ieee"
2548                          : "__gnu_d2h_alternative"));
2549
2550       /* Arithmetic.  */
2551       set_optab_libfunc (add_optab, HFmode, NULL);
2552       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2553       set_optab_libfunc (smul_optab, HFmode, NULL);
2554       set_optab_libfunc (neg_optab, HFmode, NULL);
2555       set_optab_libfunc (sub_optab, HFmode, NULL);
2556
2557       /* Comparisons.  */
2558       set_optab_libfunc (eq_optab, HFmode, NULL);
2559       set_optab_libfunc (ne_optab, HFmode, NULL);
2560       set_optab_libfunc (lt_optab, HFmode, NULL);
2561       set_optab_libfunc (le_optab, HFmode, NULL);
2562       set_optab_libfunc (ge_optab, HFmode, NULL);
2563       set_optab_libfunc (gt_optab, HFmode, NULL);
2564       set_optab_libfunc (unord_optab, HFmode, NULL);
2565       break;
2566
2567     default:
2568       break;
2569     }
2570
2571   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2572   {
2573     const arm_fixed_mode_set fixed_arith_modes[] =
2574       {
2575         { E_QQmode, "qq" },
2576         { E_UQQmode, "uqq" },
2577         { E_HQmode, "hq" },
2578         { E_UHQmode, "uhq" },
2579         { E_SQmode, "sq" },
2580         { E_USQmode, "usq" },
2581         { E_DQmode, "dq" },
2582         { E_UDQmode, "udq" },
2583         { E_TQmode, "tq" },
2584         { E_UTQmode, "utq" },
2585         { E_HAmode, "ha" },
2586         { E_UHAmode, "uha" },
2587         { E_SAmode, "sa" },
2588         { E_USAmode, "usa" },
2589         { E_DAmode, "da" },
2590         { E_UDAmode, "uda" },
2591         { E_TAmode, "ta" },
2592         { E_UTAmode, "uta" }
2593       };
2594     const arm_fixed_mode_set fixed_conv_modes[] =
2595       {
2596         { E_QQmode, "qq" },
2597         { E_UQQmode, "uqq" },
2598         { E_HQmode, "hq" },
2599         { E_UHQmode, "uhq" },
2600         { E_SQmode, "sq" },
2601         { E_USQmode, "usq" },
2602         { E_DQmode, "dq" },
2603         { E_UDQmode, "udq" },
2604         { E_TQmode, "tq" },
2605         { E_UTQmode, "utq" },
2606         { E_HAmode, "ha" },
2607         { E_UHAmode, "uha" },
2608         { E_SAmode, "sa" },
2609         { E_USAmode, "usa" },
2610         { E_DAmode, "da" },
2611         { E_UDAmode, "uda" },
2612         { E_TAmode, "ta" },
2613         { E_UTAmode, "uta" },
2614         { E_QImode, "qi" },
2615         { E_HImode, "hi" },
2616         { E_SImode, "si" },
2617         { E_DImode, "di" },
2618         { E_TImode, "ti" },
2619         { E_SFmode, "sf" },
2620         { E_DFmode, "df" }
2621       };
2622     unsigned int i, j;
2623
2624     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2625       {
2626         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2627                                      "add", fixed_arith_modes[i].name, 3);
2628         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2629                                      "ssadd", fixed_arith_modes[i].name, 3);
2630         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2631                                      "usadd", fixed_arith_modes[i].name, 3);
2632         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2633                                      "sub", fixed_arith_modes[i].name, 3);
2634         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2635                                      "sssub", fixed_arith_modes[i].name, 3);
2636         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2637                                      "ussub", fixed_arith_modes[i].name, 3);
2638         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2639                                      "mul", fixed_arith_modes[i].name, 3);
2640         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2641                                      "ssmul", fixed_arith_modes[i].name, 3);
2642         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2643                                      "usmul", fixed_arith_modes[i].name, 3);
2644         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2645                                      "div", fixed_arith_modes[i].name, 3);
2646         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2647                                      "udiv", fixed_arith_modes[i].name, 3);
2648         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2649                                      "ssdiv", fixed_arith_modes[i].name, 3);
2650         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2651                                      "usdiv", fixed_arith_modes[i].name, 3);
2652         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2653                                      "neg", fixed_arith_modes[i].name, 2);
2654         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2655                                      "ssneg", fixed_arith_modes[i].name, 2);
2656         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2657                                      "usneg", fixed_arith_modes[i].name, 2);
2658         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2659                                      "ashl", fixed_arith_modes[i].name, 3);
2660         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2661                                      "ashr", fixed_arith_modes[i].name, 3);
2662         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2663                                      "lshr", fixed_arith_modes[i].name, 3);
2664         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2665                                      "ssashl", fixed_arith_modes[i].name, 3);
2666         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2667                                      "usashl", fixed_arith_modes[i].name, 3);
2668         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2669                                      "cmp", fixed_arith_modes[i].name, 2);
2670       }
2671
2672     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2673       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2674         {
2675           if (i == j
2676               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2677                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2678             continue;
2679
2680           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2681                                       fixed_conv_modes[j].mode, "fract",
2682                                       fixed_conv_modes[i].name,
2683                                       fixed_conv_modes[j].name);
2684           arm_set_fixed_conv_libfunc (satfract_optab,
2685                                       fixed_conv_modes[i].mode,
2686                                       fixed_conv_modes[j].mode, "satfract",
2687                                       fixed_conv_modes[i].name,
2688                                       fixed_conv_modes[j].name);
2689           arm_set_fixed_conv_libfunc (fractuns_optab,
2690                                       fixed_conv_modes[i].mode,
2691                                       fixed_conv_modes[j].mode, "fractuns",
2692                                       fixed_conv_modes[i].name,
2693                                       fixed_conv_modes[j].name);
2694           arm_set_fixed_conv_libfunc (satfractuns_optab,
2695                                       fixed_conv_modes[i].mode,
2696                                       fixed_conv_modes[j].mode, "satfractuns",
2697                                       fixed_conv_modes[i].name,
2698                                       fixed_conv_modes[j].name);
2699         }
2700   }
2701
2702   if (TARGET_AAPCS_BASED)
2703     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2704 }
2705
2706 /* On AAPCS systems, this is the "struct __va_list".  */
2707 static GTY(()) tree va_list_type;
2708
2709 /* Return the type to use as __builtin_va_list.  */
2710 static tree
2711 arm_build_builtin_va_list (void)
2712 {
2713   tree va_list_name;
2714   tree ap_field;
2715
2716   if (!TARGET_AAPCS_BASED)
2717     return std_build_builtin_va_list ();
2718
2719   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2720      defined as:
2721
2722        struct __va_list
2723        {
2724          void *__ap;
2725        };
2726
2727      The C Library ABI further reinforces this definition in \S
2728      4.1.
2729
2730      We must follow this definition exactly.  The structure tag
2731      name is visible in C++ mangled names, and thus forms a part
2732      of the ABI.  The field name may be used by people who
2733      #include <stdarg.h>.  */
2734   /* Create the type.  */
2735   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2736   /* Give it the required name.  */
2737   va_list_name = build_decl (BUILTINS_LOCATION,
2738                              TYPE_DECL,
2739                              get_identifier ("__va_list"),
2740                              va_list_type);
2741   DECL_ARTIFICIAL (va_list_name) = 1;
2742   TYPE_NAME (va_list_type) = va_list_name;
2743   TYPE_STUB_DECL (va_list_type) = va_list_name;
2744   /* Create the __ap field.  */
2745   ap_field = build_decl (BUILTINS_LOCATION,
2746                          FIELD_DECL,
2747                          get_identifier ("__ap"),
2748                          ptr_type_node);
2749   DECL_ARTIFICIAL (ap_field) = 1;
2750   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2751   TYPE_FIELDS (va_list_type) = ap_field;
2752   /* Compute its layout.  */
2753   layout_type (va_list_type);
2754
2755   return va_list_type;
2756 }
2757
2758 /* Return an expression of type "void *" pointing to the next
2759    available argument in a variable-argument list.  VALIST is the
2760    user-level va_list object, of type __builtin_va_list.  */
2761 static tree
2762 arm_extract_valist_ptr (tree valist)
2763 {
2764   if (TREE_TYPE (valist) == error_mark_node)
2765     return error_mark_node;
2766
2767   /* On an AAPCS target, the pointer is stored within "struct
2768      va_list".  */
2769   if (TARGET_AAPCS_BASED)
2770     {
2771       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2772       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2773                        valist, ap_field, NULL_TREE);
2774     }
2775
2776   return valist;
2777 }
2778
2779 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2780 static void
2781 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2782 {
2783   valist = arm_extract_valist_ptr (valist);
2784   std_expand_builtin_va_start (valist, nextarg);
2785 }
2786
2787 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2788 static tree
2789 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2790                           gimple_seq *post_p)
2791 {
2792   valist = arm_extract_valist_ptr (valist);
2793   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2794 }
2795
2796 /* Check any incompatible options that the user has specified.  */
2797 static void
2798 arm_option_check_internal (struct gcc_options *opts)
2799 {
2800   int flags = opts->x_target_flags;
2801
2802   /* iWMMXt and NEON are incompatible.  */
2803   if (TARGET_IWMMXT
2804       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2805     error ("iWMMXt and NEON are incompatible");
2806
2807   /* Make sure that the processor choice does not conflict with any of the
2808      other command line choices.  */
2809   if (TARGET_ARM_P (flags)
2810       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2811     error ("target CPU does not support ARM mode");
2812
2813   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2814   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2815     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2816
2817   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2818     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2819
2820   /* If this target is normally configured to use APCS frames, warn if they
2821      are turned off and debugging is turned on.  */
2822   if (TARGET_ARM_P (flags)
2823       && write_symbols != NO_DEBUG
2824       && !TARGET_APCS_FRAME
2825       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2826     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2827
2828   /* iWMMXt unsupported under Thumb mode.  */
2829   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2830     error ("iWMMXt unsupported under Thumb mode");
2831
2832   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2833     error ("can not use -mtp=cp15 with 16-bit Thumb");
2834
2835   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2836     {
2837       error ("RTP PIC is incompatible with Thumb");
2838       flag_pic = 0;
2839     }
2840
2841   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2842      with MOVT.  */
2843   if ((target_pure_code || target_slow_flash_data)
2844       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2845     {
2846       const char *flag = (target_pure_code ? "-mpure-code" :
2847                                              "-mslow-flash-data");
2848       error ("%s only supports non-pic code on M-profile targets with the "
2849              "MOVT instruction", flag);
2850     }
2851
2852 }
2853
2854 /* Recompute the global settings depending on target attribute options.  */
2855
2856 static void
2857 arm_option_params_internal (void)
2858 {
2859   /* If we are not using the default (ARM mode) section anchor offset
2860      ranges, then set the correct ranges now.  */
2861   if (TARGET_THUMB1)
2862     {
2863       /* Thumb-1 LDR instructions cannot have negative offsets.
2864          Permissible positive offset ranges are 5-bit (for byte loads),
2865          6-bit (for halfword loads), or 7-bit (for word loads).
2866          Empirical results suggest a 7-bit anchor range gives the best
2867          overall code size.  */
2868       targetm.min_anchor_offset = 0;
2869       targetm.max_anchor_offset = 127;
2870     }
2871   else if (TARGET_THUMB2)
2872     {
2873       /* The minimum is set such that the total size of the block
2874          for a particular anchor is 248 + 1 + 4095 bytes, which is
2875          divisible by eight, ensuring natural spacing of anchors.  */
2876       targetm.min_anchor_offset = -248;
2877       targetm.max_anchor_offset = 4095;
2878     }
2879   else
2880     {
2881       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2882       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2883     }
2884
2885   /* Increase the number of conditional instructions with -Os.  */
2886   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2887
2888   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2889   if (TARGET_THUMB2)
2890     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2891 }
2892
2893 /* True if -mflip-thumb should next add an attribute for the default
2894    mode, false if it should next add an attribute for the opposite mode.  */
2895 static GTY(()) bool thumb_flipper;
2896
2897 /* Options after initial target override.  */
2898 static GTY(()) tree init_optimize;
2899
2900 static void
2901 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 {
2903   if (opts->x_align_functions <= 0)
2904     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2905       && opts->x_optimize_size ? 2 : 4;
2906 }
2907
2908 /* Implement targetm.override_options_after_change.  */
2909
2910 static void
2911 arm_override_options_after_change (void)
2912 {
2913   arm_configure_build_target (&arm_active_target,
2914                               TREE_TARGET_OPTION (target_option_default_node),
2915                               &global_options_set, false);
2916
2917   arm_override_options_after_change_1 (&global_options);
2918 }
2919
2920 /* Implement TARGET_OPTION_SAVE.  */
2921 static void
2922 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2923 {
2924   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2925   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2926   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2927 }
2928
2929 /* Implement TARGET_OPTION_RESTORE.  */
2930 static void
2931 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2932 {
2933   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2934   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2935   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2936   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2937                               false);
2938 }
2939
2940 /* Reset options between modes that the user has specified.  */
2941 static void
2942 arm_option_override_internal (struct gcc_options *opts,
2943                               struct gcc_options *opts_set)
2944 {
2945   arm_override_options_after_change_1 (opts);
2946
2947   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948     {
2949       /* The default is to enable interworking, so this warning message would
2950          be confusing to users who have just compiled with, eg, -march=armv3.  */
2951       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2952       opts->x_target_flags &= ~MASK_INTERWORK;
2953     }
2954
2955   if (TARGET_THUMB_P (opts->x_target_flags)
2956       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2957     {
2958       warning (0, "target CPU does not support THUMB instructions");
2959       opts->x_target_flags &= ~MASK_THUMB;
2960     }
2961
2962   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2963     {
2964       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2965       opts->x_target_flags &= ~MASK_APCS_FRAME;
2966     }
2967
2968   /* Callee super interworking implies thumb interworking.  Adding
2969      this to the flags here simplifies the logic elsewhere.  */
2970   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2971     opts->x_target_flags |= MASK_INTERWORK;
2972
2973   /* need to remember initial values so combinaisons of options like
2974      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2975   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2976
2977   if (! opts_set->x_arm_restrict_it)
2978     opts->x_arm_restrict_it = arm_arch8;
2979
2980   /* ARM execution state and M profile don't have [restrict] IT.  */
2981   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2982     opts->x_arm_restrict_it = 0;
2983
2984   /* Enable -munaligned-access by default for
2985      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2986      i.e. Thumb2 and ARM state only.
2987      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2988      - ARMv8 architecture-base processors.
2989
2990      Disable -munaligned-access by default for
2991      - all pre-ARMv6 architecture-based processors
2992      - ARMv6-M architecture-based processors
2993      - ARMv8-M Baseline processors.  */
2994
2995   if (! opts_set->x_unaligned_access)
2996     {
2997       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2998                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2999     }
3000   else if (opts->x_unaligned_access == 1
3001            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002     {
3003       warning (0, "target CPU does not support unaligned accesses");
3004      opts->x_unaligned_access = 0;
3005     }
3006
3007   /* Don't warn since it's on by default in -O2.  */
3008   if (TARGET_THUMB1_P (opts->x_target_flags))
3009     opts->x_flag_schedule_insns = 0;
3010   else
3011     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3012
3013   /* Disable shrink-wrap when optimizing function for size, since it tends to
3014      generate additional returns.  */
3015   if (optimize_function_for_size_p (cfun)
3016       && TARGET_THUMB2_P (opts->x_target_flags))
3017     opts->x_flag_shrink_wrap = false;
3018   else
3019     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3020
3021   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3022      - epilogue_insns - does not accurately model the corresponding insns
3023      emitted in the asm file.  In particular, see the comment in thumb_exit
3024      'Find out how many of the (return) argument registers we can corrupt'.
3025      As a consequence, the epilogue may clobber registers without fipa-ra
3026      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3027      TODO: Accurately model clobbers for epilogue_insns and reenable
3028      fipa-ra.  */
3029   if (TARGET_THUMB1_P (opts->x_target_flags))
3030     opts->x_flag_ipa_ra = 0;
3031   else
3032     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3033
3034   /* Thumb2 inline assembly code should always use unified syntax.
3035      This will apply to ARM and Thumb1 eventually.  */
3036   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3037
3038 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3039   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3040 #endif
3041 }
3042
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3045
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048    architecture have been specified, but the two are not identical.  */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051                             struct cl_target_option *opts,
3052                             struct gcc_options *opts_set,
3053                             bool warn_compatible)
3054 {
3055   const cpu_option *arm_selected_tune = NULL;
3056   const arch_option *arm_selected_arch = NULL;
3057   const cpu_option *arm_selected_cpu = NULL;
3058   const arm_fpu_desc *arm_selected_fpu = NULL;
3059   const char *tune_opts = NULL;
3060   const char *arch_opts = NULL;
3061   const char *cpu_opts = NULL;
3062
3063   bitmap_clear (target->isa);
3064   target->core_name = NULL;
3065   target->arch_name = NULL;
3066
3067   if (opts_set->x_arm_arch_string)
3068     {
3069       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3070                                                       "-march",
3071                                                       opts->x_arm_arch_string);
3072       arch_opts = strchr (opts->x_arm_arch_string, '+');
3073     }
3074
3075   if (opts_set->x_arm_cpu_string)
3076     {
3077       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3078                                                     opts->x_arm_cpu_string);
3079       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3080       arm_selected_tune = arm_selected_cpu;
3081       /* If taking the tuning from -mcpu, we don't need to rescan the
3082          options for tuning.  */
3083     }
3084
3085   if (opts_set->x_arm_tune_string)
3086     {
3087       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3088                                                      opts->x_arm_tune_string);
3089       tune_opts = strchr (opts->x_arm_tune_string, '+');
3090     }
3091
3092   if (arm_selected_arch)
3093     {
3094       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3095       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3096                                  arch_opts);
3097
3098       if (arm_selected_cpu)
3099         {
3100           auto_sbitmap cpu_isa (isa_num_bits);
3101           auto_sbitmap isa_delta (isa_num_bits);
3102
3103           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3104           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3105                                      cpu_opts);
3106           bitmap_xor (isa_delta, cpu_isa, target->isa);
3107           /* Ignore any bits that are quirk bits.  */
3108           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3109           /* Ignore (for now) any bits that might be set by -mfpu.  */
3110           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3111
3112           if (!bitmap_empty_p (isa_delta))
3113             {
3114               if (warn_compatible)
3115                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3116                          arm_selected_cpu->common.name,
3117                          arm_selected_arch->common.name);
3118               /* -march wins for code generation.
3119                  -mcpu wins for default tuning.  */
3120               if (!arm_selected_tune)
3121                 arm_selected_tune = arm_selected_cpu;
3122
3123               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3124               target->arch_name = arm_selected_arch->common.name;
3125             }
3126           else
3127             {
3128               /* Architecture and CPU are essentially the same.
3129                  Prefer the CPU setting.  */
3130               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3131               target->core_name = arm_selected_cpu->common.name;
3132               /* Copy the CPU's capabilities, so that we inherit the
3133                  appropriate extensions and quirks.  */
3134               bitmap_copy (target->isa, cpu_isa);
3135             }
3136         }
3137       else
3138         {
3139           /* Pick a CPU based on the architecture.  */
3140           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3141           target->arch_name = arm_selected_arch->common.name;
3142           /* Note: target->core_name is left unset in this path.  */
3143         }
3144     }
3145   else if (arm_selected_cpu)
3146     {
3147       target->core_name = arm_selected_cpu->common.name;
3148       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3149       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3150                                  cpu_opts);
3151       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3152     }
3153   /* If the user did not specify a processor or architecture, choose
3154      one for them.  */
3155   else
3156     {
3157       const cpu_option *sel;
3158       auto_sbitmap sought_isa (isa_num_bits);
3159       bitmap_clear (sought_isa);
3160       auto_sbitmap default_isa (isa_num_bits);
3161
3162       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3163                                                     TARGET_CPU_DEFAULT);
3164       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3165       gcc_assert (arm_selected_cpu->common.name);
3166
3167       /* RWE: All of the selection logic below (to the end of this
3168          'if' clause) looks somewhat suspect.  It appears to be mostly
3169          there to support forcing thumb support when the default CPU
3170          does not have thumb (somewhat dubious in terms of what the
3171          user might be expecting).  I think it should be removed once
3172          support for the pre-thumb era cores is removed.  */
3173       sel = arm_selected_cpu;
3174       arm_initialize_isa (default_isa, sel->common.isa_bits);
3175       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3176                                  cpu_opts);
3177
3178       /* Now check to see if the user has specified any command line
3179          switches that require certain abilities from the cpu.  */
3180
3181       if (TARGET_INTERWORK || TARGET_THUMB)
3182         {
3183           bitmap_set_bit (sought_isa, isa_bit_thumb);
3184           bitmap_set_bit (sought_isa, isa_bit_mode32);
3185
3186           /* There are no ARM processors that support both APCS-26 and
3187              interworking.  Therefore we forcibly remove MODE26 from
3188              from the isa features here (if it was set), so that the
3189              search below will always be able to find a compatible
3190              processor.  */
3191           bitmap_clear_bit (default_isa, isa_bit_mode26);
3192         }
3193
3194       /* If there are such requirements and the default CPU does not
3195          satisfy them, we need to run over the complete list of
3196          cores looking for one that is satisfactory.  */
3197       if (!bitmap_empty_p (sought_isa)
3198           && !bitmap_subset_p (sought_isa, default_isa))
3199         {
3200           auto_sbitmap candidate_isa (isa_num_bits);
3201           /* We're only interested in a CPU with at least the
3202              capabilities of the default CPU and the required
3203              additional features.  */
3204           bitmap_ior (default_isa, default_isa, sought_isa);
3205
3206           /* Try to locate a CPU type that supports all of the abilities
3207              of the default CPU, plus the extra abilities requested by
3208              the user.  */
3209           for (sel = all_cores; sel->common.name != NULL; sel++)
3210             {
3211               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3212               /* An exact match?  */
3213               if (bitmap_equal_p (default_isa, candidate_isa))
3214                 break;
3215             }
3216
3217           if (sel->common.name == NULL)
3218             {
3219               unsigned current_bit_count = isa_num_bits;
3220               const cpu_option *best_fit = NULL;
3221
3222               /* Ideally we would like to issue an error message here
3223                  saying that it was not possible to find a CPU compatible
3224                  with the default CPU, but which also supports the command
3225                  line options specified by the programmer, and so they
3226                  ought to use the -mcpu=<name> command line option to
3227                  override the default CPU type.
3228
3229                  If we cannot find a CPU that has exactly the
3230                  characteristics of the default CPU and the given
3231                  command line options we scan the array again looking
3232                  for a best match.  The best match must have at least
3233                  the capabilities of the perfect match.  */
3234               for (sel = all_cores; sel->common.name != NULL; sel++)
3235                 {
3236                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3237
3238                   if (bitmap_subset_p (default_isa, candidate_isa))
3239                     {
3240                       unsigned count;
3241
3242                       bitmap_and_compl (candidate_isa, candidate_isa,
3243                                         default_isa);
3244                       count = bitmap_popcount (candidate_isa);
3245
3246                       if (count < current_bit_count)
3247                         {
3248                           best_fit = sel;
3249                           current_bit_count = count;
3250                         }
3251                     }
3252
3253                   gcc_assert (best_fit);
3254                   sel = best_fit;
3255                 }
3256             }
3257           arm_selected_cpu = sel;
3258         }
3259
3260       /* Now we know the CPU, we can finally initialize the target
3261          structure.  */
3262       target->core_name = arm_selected_cpu->common.name;
3263       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3264       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3265                                  cpu_opts);
3266       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3267     }
3268
3269   gcc_assert (arm_selected_cpu);
3270   gcc_assert (arm_selected_arch);
3271
3272   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3273     {
3274       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3275       auto_sbitmap fpu_bits (isa_num_bits);
3276
3277       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3278       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3279       bitmap_ior (target->isa, target->isa, fpu_bits);
3280     }
3281
3282   if (!arm_selected_tune)
3283     arm_selected_tune = arm_selected_cpu;
3284   else /* Validate the features passed to -mtune.  */
3285     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3286
3287   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3288
3289   /* Finish initializing the target structure.  */
3290   target->arch_pp_name = arm_selected_arch->arch;
3291   target->base_arch = arm_selected_arch->base_arch;
3292   target->profile = arm_selected_arch->profile;
3293
3294   target->tune_flags = tune_data->tune_flags;
3295   target->tune = tune_data->tune;
3296   target->tune_core = tune_data->scheduler;
3297 }
3298
3299 /* Fix up any incompatible options that the user has specified.  */
3300 static void
3301 arm_option_override (void)
3302 {
3303   static const enum isa_feature fpu_bitlist[]
3304     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3305   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3306   cl_target_option opts;
3307
3308   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3309   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3310
3311   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3312   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3313
3314   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3315
3316   if (!global_options_set.x_arm_fpu_index)
3317     {
3318       bool ok;
3319       int fpu_index;
3320
3321       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3322                                   CL_TARGET);
3323       gcc_assert (ok);
3324       arm_fpu_index = (enum fpu_type) fpu_index;
3325     }
3326
3327   cl_target_option_save (&opts, &global_options);
3328   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3329                               true);
3330
3331 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3332   SUBTARGET_OVERRIDE_OPTIONS;
3333 #endif
3334
3335   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3336   arm_base_arch = arm_active_target.base_arch;
3337
3338   arm_tune = arm_active_target.tune_core;
3339   tune_flags = arm_active_target.tune_flags;
3340   current_tune = arm_active_target.tune;
3341
3342   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3343   if (TARGET_APCS_FRAME)
3344     flag_shrink_wrap = false;
3345
3346   /* BPABI targets use linker tricks to allow interworking on cores
3347      without thumb support.  */
3348   if (TARGET_INTERWORK
3349       && !TARGET_BPABI
3350       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3351     {
3352       warning (0, "target CPU does not support interworking" );
3353       target_flags &= ~MASK_INTERWORK;
3354     }
3355
3356   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3357     {
3358       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3359       target_flags |= MASK_APCS_FRAME;
3360     }
3361
3362   if (TARGET_POKE_FUNCTION_NAME)
3363     target_flags |= MASK_APCS_FRAME;
3364
3365   if (TARGET_APCS_REENT && flag_pic)
3366     error ("-fpic and -mapcs-reent are incompatible");
3367
3368   if (TARGET_APCS_REENT)
3369     warning (0, "APCS reentrant code not supported.  Ignored");
3370
3371   /* Initialize boolean versions of the architectural flags, for use
3372      in the arm.md file.  */
3373   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3374   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3375   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3376   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3377   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3378   arm_arch5te = arm_arch5e
3379     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3380   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3381   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3382   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3383   arm_arch6m = arm_arch6 && !arm_arch_notm;
3384   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3385   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3386   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3387   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3388   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3389   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3390   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3391   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3392   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3393   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3394   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3395   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3396   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3397   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3398   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3399   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3400   if (arm_fp16_inst)
3401     {
3402       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3403         error ("selected fp16 options are incompatible");
3404       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3405     }
3406
3407
3408   /* Set up some tuning parameters.  */
3409   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3410   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3411   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3412   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3413   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3414   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3415
3416   /* And finally, set up some quirks.  */
3417   arm_arch_no_volatile_ce
3418     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3419   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3420                                             isa_bit_quirk_armv6kz);
3421
3422   /* V5 code we generate is completely interworking capable, so we turn off
3423      TARGET_INTERWORK here to avoid many tests later on.  */
3424
3425   /* XXX However, we must pass the right pre-processor defines to CPP
3426      or GLD can get confused.  This is a hack.  */
3427   if (TARGET_INTERWORK)
3428     arm_cpp_interwork = 1;
3429
3430   if (arm_arch5)
3431     target_flags &= ~MASK_INTERWORK;
3432
3433   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3434     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3435
3436   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3437     error ("iwmmxt abi requires an iwmmxt capable cpu");
3438
3439   /* If soft-float is specified then don't use FPU.  */
3440   if (TARGET_SOFT_FLOAT)
3441     arm_fpu_attr = FPU_NONE;
3442   else
3443     arm_fpu_attr = FPU_VFP;
3444
3445   if (TARGET_AAPCS_BASED)
3446     {
3447       if (TARGET_CALLER_INTERWORKING)
3448         error ("AAPCS does not support -mcaller-super-interworking");
3449       else
3450         if (TARGET_CALLEE_INTERWORKING)
3451           error ("AAPCS does not support -mcallee-super-interworking");
3452     }
3453
3454   /* __fp16 support currently assumes the core has ldrh.  */
3455   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3456     sorry ("__fp16 and no ldrh");
3457
3458   if (TARGET_AAPCS_BASED)
3459     {
3460       if (arm_abi == ARM_ABI_IWMMXT)
3461         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3462       else if (TARGET_HARD_FLOAT_ABI)
3463         {
3464           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3465           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3466             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3467         }
3468       else
3469         arm_pcs_default = ARM_PCS_AAPCS;
3470     }
3471   else
3472     {
3473       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3474         sorry ("-mfloat-abi=hard and VFP");
3475
3476       if (arm_abi == ARM_ABI_APCS)
3477         arm_pcs_default = ARM_PCS_APCS;
3478       else
3479         arm_pcs_default = ARM_PCS_ATPCS;
3480     }
3481
3482   /* For arm2/3 there is no need to do any scheduling if we are doing
3483      software floating-point.  */
3484   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3485     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3486
3487   /* Use the cp15 method if it is available.  */
3488   if (target_thread_pointer == TP_AUTO)
3489     {
3490       if (arm_arch6k && !TARGET_THUMB1)
3491         target_thread_pointer = TP_CP15;
3492       else
3493         target_thread_pointer = TP_SOFT;
3494     }
3495
3496   /* Override the default structure alignment for AAPCS ABI.  */
3497   if (!global_options_set.x_arm_structure_size_boundary)
3498     {
3499       if (TARGET_AAPCS_BASED)
3500         arm_structure_size_boundary = 8;
3501     }
3502   else
3503     {
3504       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3505
3506       if (arm_structure_size_boundary != 8
3507           && arm_structure_size_boundary != 32
3508           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3509         {
3510           if (ARM_DOUBLEWORD_ALIGN)
3511             warning (0,
3512                      "structure size boundary can only be set to 8, 32 or 64");
3513           else
3514             warning (0, "structure size boundary can only be set to 8 or 32");
3515           arm_structure_size_boundary
3516             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3517         }
3518     }
3519
3520   if (TARGET_VXWORKS_RTP)
3521     {
3522       if (!global_options_set.x_arm_pic_data_is_text_relative)
3523         arm_pic_data_is_text_relative = 0;
3524     }
3525   else if (flag_pic
3526            && !arm_pic_data_is_text_relative
3527            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3528     /* When text & data segments don't have a fixed displacement, the
3529        intended use is with a single, read only, pic base register.
3530        Unless the user explicitly requested not to do that, set
3531        it.  */
3532     target_flags |= MASK_SINGLE_PIC_BASE;
3533
3534   /* If stack checking is disabled, we can use r10 as the PIC register,
3535      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3536   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3537     {
3538       if (TARGET_VXWORKS_RTP)
3539         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3540       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3541     }
3542
3543   if (flag_pic && TARGET_VXWORKS_RTP)
3544     arm_pic_register = 9;
3545
3546   if (arm_pic_register_string != NULL)
3547     {
3548       int pic_register = decode_reg_name (arm_pic_register_string);
3549
3550       if (!flag_pic)
3551         warning (0, "-mpic-register= is useless without -fpic");
3552
3553       /* Prevent the user from choosing an obviously stupid PIC register.  */
3554       else if (pic_register < 0 || call_used_regs[pic_register]
3555                || pic_register == HARD_FRAME_POINTER_REGNUM
3556                || pic_register == STACK_POINTER_REGNUM
3557                || pic_register >= PC_REGNUM
3558                || (TARGET_VXWORKS_RTP
3559                    && (unsigned int) pic_register != arm_pic_register))
3560         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3561       else
3562         arm_pic_register = pic_register;
3563     }
3564
3565   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3566   if (fix_cm3_ldrd == 2)
3567     {
3568       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3569         fix_cm3_ldrd = 1;
3570       else
3571         fix_cm3_ldrd = 0;
3572     }
3573
3574   /* Hot/Cold partitioning is not currently supported, since we can't
3575      handle literal pool placement in that case.  */
3576   if (flag_reorder_blocks_and_partition)
3577     {
3578       inform (input_location,
3579               "-freorder-blocks-and-partition not supported on this architecture");
3580       flag_reorder_blocks_and_partition = 0;
3581       flag_reorder_blocks = 1;
3582     }
3583
3584   if (flag_pic)
3585     /* Hoisting PIC address calculations more aggressively provides a small,
3586        but measurable, size reduction for PIC code.  Therefore, we decrease
3587        the bar for unrestricted expression hoisting to the cost of PIC address
3588        calculation, which is 2 instructions.  */
3589     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3590                            global_options.x_param_values,
3591                            global_options_set.x_param_values);
3592
3593   /* ARM EABI defaults to strict volatile bitfields.  */
3594   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3595       && abi_version_at_least(2))
3596     flag_strict_volatile_bitfields = 1;
3597
3598   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3599      have deemed it beneficial (signified by setting
3600      prefetch.num_slots to 1 or more).  */
3601   if (flag_prefetch_loop_arrays < 0
3602       && HAVE_prefetch
3603       && optimize >= 3
3604       && current_tune->prefetch.num_slots > 0)
3605     flag_prefetch_loop_arrays = 1;
3606
3607   /* Set up parameters to be used in prefetching algorithm.  Do not
3608      override the defaults unless we are tuning for a core we have
3609      researched values for.  */
3610   if (current_tune->prefetch.num_slots > 0)
3611     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3612                            current_tune->prefetch.num_slots,
3613                            global_options.x_param_values,
3614                            global_options_set.x_param_values);
3615   if (current_tune->prefetch.l1_cache_line_size >= 0)
3616     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3617                            current_tune->prefetch.l1_cache_line_size,
3618                            global_options.x_param_values,
3619                            global_options_set.x_param_values);
3620   if (current_tune->prefetch.l1_cache_size >= 0)
3621     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3622                            current_tune->prefetch.l1_cache_size,
3623                            global_options.x_param_values,
3624                            global_options_set.x_param_values);
3625
3626   /* Use Neon to perform 64-bits operations rather than core
3627      registers.  */
3628   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3629   if (use_neon_for_64bits == 1)
3630      prefer_neon_for_64bits = true;
3631
3632   /* Use the alternative scheduling-pressure algorithm by default.  */
3633   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3634                          global_options.x_param_values,
3635                          global_options_set.x_param_values);
3636
3637   /* Look through ready list and all of queue for instructions
3638      relevant for L2 auto-prefetcher.  */
3639   int param_sched_autopref_queue_depth;
3640
3641   switch (current_tune->sched_autopref)
3642     {
3643     case tune_params::SCHED_AUTOPREF_OFF:
3644       param_sched_autopref_queue_depth = -1;
3645       break;
3646
3647     case tune_params::SCHED_AUTOPREF_RANK:
3648       param_sched_autopref_queue_depth = 0;
3649       break;
3650
3651     case tune_params::SCHED_AUTOPREF_FULL:
3652       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3653       break;
3654
3655     default:
3656       gcc_unreachable ();
3657     }
3658
3659   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3660                          param_sched_autopref_queue_depth,
3661                          global_options.x_param_values,
3662                          global_options_set.x_param_values);
3663
3664   /* Currently, for slow flash data, we just disable literal pools.  We also
3665      disable it for pure-code.  */
3666   if (target_slow_flash_data || target_pure_code)
3667     arm_disable_literal_pool = true;
3668
3669   if (use_cmse && !arm_arch_cmse)
3670     error ("target CPU does not support ARMv8-M Security Extensions");
3671
3672   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3673      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3674   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3675     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3676
3677   /* Disable scheduling fusion by default if it's not armv7 processor
3678      or doesn't prefer ldrd/strd.  */
3679   if (flag_schedule_fusion == 2
3680       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3681     flag_schedule_fusion = 0;
3682
3683   /* Need to remember initial options before they are overriden.  */
3684   init_optimize = build_optimization_node (&global_options);
3685
3686   arm_option_override_internal (&global_options, &global_options_set);
3687   arm_option_check_internal (&global_options);
3688   arm_option_params_internal ();
3689
3690   /* Create the default target_options structure.  */
3691   target_option_default_node = target_option_current_node
3692     = build_target_option_node (&global_options);
3693
3694   /* Register global variables with the garbage collector.  */
3695   arm_add_gc_roots ();
3696
3697   /* Init initial mode for testing.  */
3698   thumb_flipper = TARGET_THUMB;
3699 }
3700
3701 static void
3702 arm_add_gc_roots (void)
3703 {
3704   gcc_obstack_init(&minipool_obstack);
3705   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3706 }
3707 \f
3708 /* A table of known ARM exception types.
3709    For use with the interrupt function attribute.  */
3710
3711 typedef struct
3712 {
3713   const char *const arg;
3714   const unsigned long return_value;
3715 }
3716 isr_attribute_arg;
3717
3718 static const isr_attribute_arg isr_attribute_args [] =
3719 {
3720   { "IRQ",   ARM_FT_ISR },
3721   { "irq",   ARM_FT_ISR },
3722   { "FIQ",   ARM_FT_FIQ },
3723   { "fiq",   ARM_FT_FIQ },
3724   { "ABORT", ARM_FT_ISR },
3725   { "abort", ARM_FT_ISR },
3726   { "ABORT", ARM_FT_ISR },
3727   { "abort", ARM_FT_ISR },
3728   { "UNDEF", ARM_FT_EXCEPTION },
3729   { "undef", ARM_FT_EXCEPTION },
3730   { "SWI",   ARM_FT_EXCEPTION },
3731   { "swi",   ARM_FT_EXCEPTION },
3732   { NULL,    ARM_FT_NORMAL }
3733 };
3734
3735 /* Returns the (interrupt) function type of the current
3736    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3737
3738 static unsigned long
3739 arm_isr_value (tree argument)
3740 {
3741   const isr_attribute_arg * ptr;
3742   const char *              arg;
3743
3744   if (!arm_arch_notm)
3745     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3746
3747   /* No argument - default to IRQ.  */
3748   if (argument == NULL_TREE)
3749     return ARM_FT_ISR;
3750
3751   /* Get the value of the argument.  */
3752   if (TREE_VALUE (argument) == NULL_TREE
3753       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3754     return ARM_FT_UNKNOWN;
3755
3756   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3757
3758   /* Check it against the list of known arguments.  */
3759   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3760     if (streq (arg, ptr->arg))
3761       return ptr->return_value;
3762
3763   /* An unrecognized interrupt type.  */
3764   return ARM_FT_UNKNOWN;
3765 }
3766
3767 /* Computes the type of the current function.  */
3768
3769 static unsigned long
3770 arm_compute_func_type (void)
3771 {
3772   unsigned long type = ARM_FT_UNKNOWN;
3773   tree a;
3774   tree attr;
3775
3776   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3777
3778   /* Decide if the current function is volatile.  Such functions
3779      never return, and many memory cycles can be saved by not storing
3780      register values that will never be needed again.  This optimization
3781      was added to speed up context switching in a kernel application.  */
3782   if (optimize > 0
3783       && (TREE_NOTHROW (current_function_decl)
3784           || !(flag_unwind_tables
3785                || (flag_exceptions
3786                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3787       && TREE_THIS_VOLATILE (current_function_decl))
3788     type |= ARM_FT_VOLATILE;
3789
3790   if (cfun->static_chain_decl != NULL)
3791     type |= ARM_FT_NESTED;
3792
3793   attr = DECL_ATTRIBUTES (current_function_decl);
3794
3795   a = lookup_attribute ("naked", attr);
3796   if (a != NULL_TREE)
3797     type |= ARM_FT_NAKED;
3798
3799   a = lookup_attribute ("isr", attr);
3800   if (a == NULL_TREE)
3801     a = lookup_attribute ("interrupt", attr);
3802
3803   if (a == NULL_TREE)
3804     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3805   else
3806     type |= arm_isr_value (TREE_VALUE (a));
3807
3808   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3809     type |= ARM_FT_CMSE_ENTRY;
3810
3811   return type;
3812 }
3813
3814 /* Returns the type of the current function.  */
3815
3816 unsigned long
3817 arm_current_func_type (void)
3818 {
3819   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3820     cfun->machine->func_type = arm_compute_func_type ();
3821
3822   return cfun->machine->func_type;
3823 }
3824
3825 bool
3826 arm_allocate_stack_slots_for_args (void)
3827 {
3828   /* Naked functions should not allocate stack slots for arguments.  */
3829   return !IS_NAKED (arm_current_func_type ());
3830 }
3831
3832 static bool
3833 arm_warn_func_return (tree decl)
3834 {
3835   /* Naked functions are implemented entirely in assembly, including the
3836      return sequence, so suppress warnings about this.  */
3837   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3838 }
3839
3840 \f
3841 /* Output assembler code for a block containing the constant parts
3842    of a trampoline, leaving space for the variable parts.
3843
3844    On the ARM, (if r8 is the static chain regnum, and remembering that
3845    referencing pc adds an offset of 8) the trampoline looks like:
3846            ldr          r8, [pc, #0]
3847            ldr          pc, [pc]
3848            .word        static chain value
3849            .word        function's address
3850    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3851
3852 static void
3853 arm_asm_trampoline_template (FILE *f)
3854 {
3855   fprintf (f, "\t.syntax unified\n");
3856
3857   if (TARGET_ARM)
3858     {
3859       fprintf (f, "\t.arm\n");
3860       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3861       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3862     }
3863   else if (TARGET_THUMB2)
3864     {
3865       fprintf (f, "\t.thumb\n");
3866       /* The Thumb-2 trampoline is similar to the arm implementation.
3867          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3868       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3869                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3870       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3871     }
3872   else
3873     {
3874       ASM_OUTPUT_ALIGN (f, 2);
3875       fprintf (f, "\t.code\t16\n");
3876       fprintf (f, ".Ltrampoline_start:\n");
3877       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3878       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3879       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3880       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3881       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3882       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3883     }
3884   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3885   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3886 }
3887
3888 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3889
3890 static void
3891 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3892 {
3893   rtx fnaddr, mem, a_tramp;
3894
3895   emit_block_move (m_tramp, assemble_trampoline_template (),
3896                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3897
3898   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3899   emit_move_insn (mem, chain_value);
3900
3901   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3902   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3903   emit_move_insn (mem, fnaddr);
3904
3905   a_tramp = XEXP (m_tramp, 0);
3906   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3907                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3908                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3909 }
3910
3911 /* Thumb trampolines should be entered in thumb mode, so set
3912    the bottom bit of the address.  */
3913
3914 static rtx
3915 arm_trampoline_adjust_address (rtx addr)
3916 {
3917   if (TARGET_THUMB)
3918     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3919                                 NULL, 0, OPTAB_LIB_WIDEN);
3920   return addr;
3921 }
3922 \f
3923 /* Return 1 if it is possible to return using a single instruction.
3924    If SIBLING is non-null, this is a test for a return before a sibling
3925    call.  SIBLING is the call insn, so we can examine its register usage.  */
3926
3927 int
3928 use_return_insn (int iscond, rtx sibling)
3929 {
3930   int regno;
3931   unsigned int func_type;
3932   unsigned long saved_int_regs;
3933   unsigned HOST_WIDE_INT stack_adjust;
3934   arm_stack_offsets *offsets;
3935
3936   /* Never use a return instruction before reload has run.  */
3937   if (!reload_completed)
3938     return 0;
3939
3940   func_type = arm_current_func_type ();
3941
3942   /* Naked, volatile and stack alignment functions need special
3943      consideration.  */
3944   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3945     return 0;
3946
3947   /* So do interrupt functions that use the frame pointer and Thumb
3948      interrupt functions.  */
3949   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3950     return 0;
3951
3952   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3953       && !optimize_function_for_size_p (cfun))
3954     return 0;
3955
3956   offsets = arm_get_frame_offsets ();
3957   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3958
3959   /* As do variadic functions.  */
3960   if (crtl->args.pretend_args_size
3961       || cfun->machine->uses_anonymous_args
3962       /* Or if the function calls __builtin_eh_return () */
3963       || crtl->calls_eh_return
3964       /* Or if the function calls alloca */
3965       || cfun->calls_alloca
3966       /* Or if there is a stack adjustment.  However, if the stack pointer
3967          is saved on the stack, we can use a pre-incrementing stack load.  */
3968       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3969                                  && stack_adjust == 4))
3970       /* Or if the static chain register was saved above the frame, under the
3971          assumption that the stack pointer isn't saved on the stack.  */
3972       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3973           && arm_compute_static_chain_stack_bytes() != 0))
3974     return 0;
3975
3976   saved_int_regs = offsets->saved_regs_mask;
3977
3978   /* Unfortunately, the insn
3979
3980        ldmib sp, {..., sp, ...}
3981
3982      triggers a bug on most SA-110 based devices, such that the stack
3983      pointer won't be correctly restored if the instruction takes a
3984      page fault.  We work around this problem by popping r3 along with
3985      the other registers, since that is never slower than executing
3986      another instruction.
3987
3988      We test for !arm_arch5 here, because code for any architecture
3989      less than this could potentially be run on one of the buggy
3990      chips.  */
3991   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3992     {
3993       /* Validate that r3 is a call-clobbered register (always true in
3994          the default abi) ...  */
3995       if (!call_used_regs[3])
3996         return 0;
3997
3998       /* ... that it isn't being used for a return value ... */
3999       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4000         return 0;
4001
4002       /* ... or for a tail-call argument ...  */
4003       if (sibling)
4004         {
4005           gcc_assert (CALL_P (sibling));
4006
4007           if (find_regno_fusage (sibling, USE, 3))
4008             return 0;
4009         }
4010
4011       /* ... and that there are no call-saved registers in r0-r2
4012          (always true in the default ABI).  */
4013       if (saved_int_regs & 0x7)
4014         return 0;
4015     }
4016
4017   /* Can't be done if interworking with Thumb, and any registers have been
4018      stacked.  */
4019   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4020     return 0;
4021
4022   /* On StrongARM, conditional returns are expensive if they aren't
4023      taken and multiple registers have been stacked.  */
4024   if (iscond && arm_tune_strongarm)
4025     {
4026       /* Conditional return when just the LR is stored is a simple
4027          conditional-load instruction, that's not expensive.  */
4028       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4029         return 0;
4030
4031       if (flag_pic
4032           && arm_pic_register != INVALID_REGNUM
4033           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4034         return 0;
4035     }
4036
4037   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4038      several instructions if anything needs to be popped.  */
4039   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4040     return 0;
4041
4042   /* If there are saved registers but the LR isn't saved, then we need
4043      two instructions for the return.  */
4044   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4045     return 0;
4046
4047   /* Can't be done if any of the VFP regs are pushed,
4048      since this also requires an insn.  */
4049   if (TARGET_HARD_FLOAT)
4050     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4051       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4052         return 0;
4053
4054   if (TARGET_REALLY_IWMMXT)
4055     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4056       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4057         return 0;
4058
4059   return 1;
4060 }
4061
4062 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4063    shrink-wrapping if possible.  This is the case if we need to emit a
4064    prologue, which we can test by looking at the offsets.  */
4065 bool
4066 use_simple_return_p (void)
4067 {
4068   arm_stack_offsets *offsets;
4069
4070   /* Note this function can be called before or after reload.  */
4071   if (!reload_completed)
4072     arm_compute_frame_layout ();
4073
4074   offsets = arm_get_frame_offsets ();
4075   return offsets->outgoing_args != 0;
4076 }
4077
4078 /* Return TRUE if int I is a valid immediate ARM constant.  */
4079
4080 int
4081 const_ok_for_arm (HOST_WIDE_INT i)
4082 {
4083   int lowbit;
4084
4085   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4086      be all zero, or all one.  */
4087   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4088       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4089           != ((~(unsigned HOST_WIDE_INT) 0)
4090               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4091     return FALSE;
4092
4093   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4094
4095   /* Fast return for 0 and small values.  We must do this for zero, since
4096      the code below can't handle that one case.  */
4097   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4098     return TRUE;
4099
4100   /* Get the number of trailing zeros.  */
4101   lowbit = ffs((int) i) - 1;
4102
4103   /* Only even shifts are allowed in ARM mode so round down to the
4104      nearest even number.  */
4105   if (TARGET_ARM)
4106     lowbit &= ~1;
4107
4108   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4109     return TRUE;
4110
4111   if (TARGET_ARM)
4112     {
4113       /* Allow rotated constants in ARM mode.  */
4114       if (lowbit <= 4
4115            && ((i & ~0xc000003f) == 0
4116                || (i & ~0xf000000f) == 0
4117                || (i & ~0xfc000003) == 0))
4118         return TRUE;
4119     }
4120   else if (TARGET_THUMB2)
4121     {
4122       HOST_WIDE_INT v;
4123
4124       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4125       v = i & 0xff;
4126       v |= v << 16;
4127       if (i == v || i == (v | (v << 8)))
4128         return TRUE;
4129
4130       /* Allow repeated pattern 0xXY00XY00.  */
4131       v = i & 0xff00;
4132       v |= v << 16;
4133       if (i == v)
4134         return TRUE;
4135     }
4136   else if (TARGET_HAVE_MOVT)
4137     {
4138       /* Thumb-1 Targets with MOVT.  */
4139       if (i > 0xffff)
4140         return FALSE;
4141       else
4142         return TRUE;
4143     }
4144
4145   return FALSE;
4146 }
4147
4148 /* Return true if I is a valid constant for the operation CODE.  */
4149 int
4150 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4151 {
4152   if (const_ok_for_arm (i))
4153     return 1;
4154
4155   switch (code)
4156     {
4157     case SET:
4158       /* See if we can use movw.  */
4159       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4160         return 1;
4161       else
4162         /* Otherwise, try mvn.  */
4163         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4164
4165     case PLUS:
4166       /* See if we can use addw or subw.  */
4167       if (TARGET_THUMB2
4168           && ((i & 0xfffff000) == 0
4169               || ((-i) & 0xfffff000) == 0))
4170         return 1;
4171       /* Fall through.  */
4172     case COMPARE:
4173     case EQ:
4174     case NE:
4175     case GT:
4176     case LE:
4177     case LT:
4178     case GE:
4179     case GEU:
4180     case LTU:
4181     case GTU:
4182     case LEU:
4183     case UNORDERED:
4184     case ORDERED:
4185     case UNEQ:
4186     case UNGE:
4187     case UNLT:
4188     case UNGT:
4189     case UNLE:
4190       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4191
4192     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4193     case XOR:
4194       return 0;
4195
4196     case IOR:
4197       if (TARGET_THUMB2)
4198         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4199       return 0;
4200
4201     case AND:
4202       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4203
4204     default:
4205       gcc_unreachable ();
4206     }
4207 }
4208
4209 /* Return true if I is a valid di mode constant for the operation CODE.  */
4210 int
4211 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4212 {
4213   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4214   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4215   rtx hi = GEN_INT (hi_val);
4216   rtx lo = GEN_INT (lo_val);
4217
4218   if (TARGET_THUMB1)
4219     return 0;
4220
4221   switch (code)
4222     {
4223     case AND:
4224     case IOR:
4225     case XOR:
4226       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4227               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4228     case PLUS:
4229       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4230
4231     default:
4232       return 0;
4233     }
4234 }
4235
4236 /* Emit a sequence of insns to handle a large constant.
4237    CODE is the code of the operation required, it can be any of SET, PLUS,
4238    IOR, AND, XOR, MINUS;
4239    MODE is the mode in which the operation is being performed;
4240    VAL is the integer to operate on;
4241    SOURCE is the other operand (a register, or a null-pointer for SET);
4242    SUBTARGETS means it is safe to create scratch registers if that will
4243    either produce a simpler sequence, or we will want to cse the values.
4244    Return value is the number of insns emitted.  */
4245
4246 /* ??? Tweak this for thumb2.  */
4247 int
4248 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4249                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4250 {
4251   rtx cond;
4252
4253   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4254     cond = COND_EXEC_TEST (PATTERN (insn));
4255   else
4256     cond = NULL_RTX;
4257
4258   if (subtargets || code == SET
4259       || (REG_P (target) && REG_P (source)
4260           && REGNO (target) != REGNO (source)))
4261     {
4262       /* After arm_reorg has been called, we can't fix up expensive
4263          constants by pushing them into memory so we must synthesize
4264          them in-line, regardless of the cost.  This is only likely to
4265          be more costly on chips that have load delay slots and we are
4266          compiling without running the scheduler (so no splitting
4267          occurred before the final instruction emission).
4268
4269          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4270       */
4271       if (!cfun->machine->after_arm_reorg
4272           && !cond
4273           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4274                                 1, 0)
4275               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4276                  + (code != SET))))
4277         {
4278           if (code == SET)
4279             {
4280               /* Currently SET is the only monadic value for CODE, all
4281                  the rest are diadic.  */
4282               if (TARGET_USE_MOVT)
4283                 arm_emit_movpair (target, GEN_INT (val));
4284               else
4285                 emit_set_insn (target, GEN_INT (val));
4286
4287               return 1;
4288             }
4289           else
4290             {
4291               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4292
4293               if (TARGET_USE_MOVT)
4294                 arm_emit_movpair (temp, GEN_INT (val));
4295               else
4296                 emit_set_insn (temp, GEN_INT (val));
4297
4298               /* For MINUS, the value is subtracted from, since we never
4299                  have subtraction of a constant.  */
4300               if (code == MINUS)
4301                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4302               else
4303                 emit_set_insn (target,
4304                                gen_rtx_fmt_ee (code, mode, source, temp));
4305               return 2;
4306             }
4307         }
4308     }
4309
4310   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4311                            1);
4312 }
4313
4314 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4315    ARM/THUMB2 immediates, and add up to VAL.
4316    Thr function return value gives the number of insns required.  */
4317 static int
4318 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4319                             struct four_ints *return_sequence)
4320 {
4321   int best_consecutive_zeros = 0;
4322   int i;
4323   int best_start = 0;
4324   int insns1, insns2;
4325   struct four_ints tmp_sequence;
4326
4327   /* If we aren't targeting ARM, the best place to start is always at
4328      the bottom, otherwise look more closely.  */
4329   if (TARGET_ARM)
4330     {
4331       for (i = 0; i < 32; i += 2)
4332         {
4333           int consecutive_zeros = 0;
4334
4335           if (!(val & (3 << i)))
4336             {
4337               while ((i < 32) && !(val & (3 << i)))
4338                 {
4339                   consecutive_zeros += 2;
4340                   i += 2;
4341                 }
4342               if (consecutive_zeros > best_consecutive_zeros)
4343                 {
4344                   best_consecutive_zeros = consecutive_zeros;
4345                   best_start = i - consecutive_zeros;
4346                 }
4347               i -= 2;
4348             }
4349         }
4350     }
4351
4352   /* So long as it won't require any more insns to do so, it's
4353      desirable to emit a small constant (in bits 0...9) in the last
4354      insn.  This way there is more chance that it can be combined with
4355      a later addressing insn to form a pre-indexed load or store
4356      operation.  Consider:
4357
4358            *((volatile int *)0xe0000100) = 1;
4359            *((volatile int *)0xe0000110) = 2;
4360
4361      We want this to wind up as:
4362
4363             mov rA, #0xe0000000
4364             mov rB, #1
4365             str rB, [rA, #0x100]
4366             mov rB, #2
4367             str rB, [rA, #0x110]
4368
4369      rather than having to synthesize both large constants from scratch.
4370
4371      Therefore, we calculate how many insns would be required to emit
4372      the constant starting from `best_start', and also starting from
4373      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4374      yield a shorter sequence, we may as well use zero.  */
4375   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4376   if (best_start != 0
4377       && ((HOST_WIDE_INT_1U << best_start) < val))
4378     {
4379       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4380       if (insns2 <= insns1)
4381         {
4382           *return_sequence = tmp_sequence;
4383           insns1 = insns2;
4384         }
4385     }
4386
4387   return insns1;
4388 }
4389
4390 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4391 static int
4392 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4393                              struct four_ints *return_sequence, int i)
4394 {
4395   int remainder = val & 0xffffffff;
4396   int insns = 0;
4397
4398   /* Try and find a way of doing the job in either two or three
4399      instructions.
4400
4401      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4402      location.  We start at position I.  This may be the MSB, or
4403      optimial_immediate_sequence may have positioned it at the largest block
4404      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4405      wrapping around to the top of the word when we drop off the bottom.
4406      In the worst case this code should produce no more than four insns.
4407
4408      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4409      constants, shifted to any arbitrary location.  We should always start
4410      at the MSB.  */
4411   do
4412     {
4413       int end;
4414       unsigned int b1, b2, b3, b4;
4415       unsigned HOST_WIDE_INT result;
4416       int loc;
4417
4418       gcc_assert (insns < 4);
4419
4420       if (i <= 0)
4421         i += 32;
4422
4423       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4424       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4425         {
4426           loc = i;
4427           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4428             /* We can use addw/subw for the last 12 bits.  */
4429             result = remainder;
4430           else
4431             {
4432               /* Use an 8-bit shifted/rotated immediate.  */
4433               end = i - 8;
4434               if (end < 0)
4435                 end += 32;
4436               result = remainder & ((0x0ff << end)
4437                                    | ((i < end) ? (0xff >> (32 - end))
4438                                                 : 0));
4439               i -= 8;
4440             }
4441         }
4442       else
4443         {
4444           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4445              arbitrary shifts.  */
4446           i -= TARGET_ARM ? 2 : 1;
4447           continue;
4448         }
4449
4450       /* Next, see if we can do a better job with a thumb2 replicated
4451          constant.
4452
4453          We do it this way around to catch the cases like 0x01F001E0 where
4454          two 8-bit immediates would work, but a replicated constant would
4455          make it worse.
4456
4457          TODO: 16-bit constants that don't clear all the bits, but still win.
4458          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4459       if (TARGET_THUMB2)
4460         {
4461           b1 = (remainder & 0xff000000) >> 24;
4462           b2 = (remainder & 0x00ff0000) >> 16;
4463           b3 = (remainder & 0x0000ff00) >> 8;
4464           b4 = remainder & 0xff;
4465
4466           if (loc > 24)
4467             {
4468               /* The 8-bit immediate already found clears b1 (and maybe b2),
4469                  but must leave b3 and b4 alone.  */
4470
4471               /* First try to find a 32-bit replicated constant that clears
4472                  almost everything.  We can assume that we can't do it in one,
4473                  or else we wouldn't be here.  */
4474               unsigned int tmp = b1 & b2 & b3 & b4;
4475               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4476                                   + (tmp << 24);
4477               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4478                                             + (tmp == b3) + (tmp == b4);
4479               if (tmp
4480                   && (matching_bytes >= 3
4481                       || (matching_bytes == 2
4482                           && const_ok_for_op (remainder & ~tmp2, code))))
4483                 {
4484                   /* At least 3 of the bytes match, and the fourth has at
4485                      least as many bits set, or two of the bytes match
4486                      and it will only require one more insn to finish.  */
4487                   result = tmp2;
4488                   i = tmp != b1 ? 32
4489                       : tmp != b2 ? 24
4490                       : tmp != b3 ? 16
4491                       : 8;
4492                 }
4493
4494               /* Second, try to find a 16-bit replicated constant that can
4495                  leave three of the bytes clear.  If b2 or b4 is already
4496                  zero, then we can.  If the 8-bit from above would not
4497                  clear b2 anyway, then we still win.  */
4498               else if (b1 == b3 && (!b2 || !b4
4499                                || (remainder & 0x00ff0000 & ~result)))
4500                 {
4501                   result = remainder & 0xff00ff00;
4502                   i = 24;
4503                 }
4504             }
4505           else if (loc > 16)
4506             {
4507               /* The 8-bit immediate already found clears b2 (and maybe b3)
4508                  and we don't get here unless b1 is alredy clear, but it will
4509                  leave b4 unchanged.  */
4510
4511               /* If we can clear b2 and b4 at once, then we win, since the
4512                  8-bits couldn't possibly reach that far.  */
4513               if (b2 == b4)
4514                 {
4515                   result = remainder & 0x00ff00ff;
4516                   i = 16;
4517                 }
4518             }
4519         }
4520
4521       return_sequence->i[insns++] = result;
4522       remainder &= ~result;
4523
4524       if (code == SET || code == MINUS)
4525         code = PLUS;
4526     }
4527   while (remainder);
4528
4529   return insns;
4530 }
4531
4532 /* Emit an instruction with the indicated PATTERN.  If COND is
4533    non-NULL, conditionalize the execution of the instruction on COND
4534    being true.  */
4535
4536 static void
4537 emit_constant_insn (rtx cond, rtx pattern)
4538 {
4539   if (cond)
4540     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4541   emit_insn (pattern);
4542 }
4543
4544 /* As above, but extra parameter GENERATE which, if clear, suppresses
4545    RTL generation.  */
4546
4547 static int
4548 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4549                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4550                   int subtargets, int generate)
4551 {
4552   int can_invert = 0;
4553   int can_negate = 0;
4554   int final_invert = 0;
4555   int i;
4556   int set_sign_bit_copies = 0;
4557   int clear_sign_bit_copies = 0;
4558   int clear_zero_bit_copies = 0;
4559   int set_zero_bit_copies = 0;
4560   int insns = 0, neg_insns, inv_insns;
4561   unsigned HOST_WIDE_INT temp1, temp2;
4562   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4563   struct four_ints *immediates;
4564   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4565
4566   /* Find out which operations are safe for a given CODE.  Also do a quick
4567      check for degenerate cases; these can occur when DImode operations
4568      are split.  */
4569   switch (code)
4570     {
4571     case SET:
4572       can_invert = 1;
4573       break;
4574
4575     case PLUS:
4576       can_negate = 1;
4577       break;
4578
4579     case IOR:
4580       if (remainder == 0xffffffff)
4581         {
4582           if (generate)
4583             emit_constant_insn (cond,
4584                                 gen_rtx_SET (target,
4585                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4586           return 1;
4587         }
4588
4589       if (remainder == 0)
4590         {
4591           if (reload_completed && rtx_equal_p (target, source))
4592             return 0;
4593
4594           if (generate)
4595             emit_constant_insn (cond, gen_rtx_SET (target, source));
4596           return 1;
4597         }
4598       break;
4599
4600     case AND:
4601       if (remainder == 0)
4602         {
4603           if (generate)
4604             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4605           return 1;
4606         }
4607       if (remainder == 0xffffffff)
4608         {
4609           if (reload_completed && rtx_equal_p (target, source))
4610             return 0;
4611           if (generate)
4612             emit_constant_insn (cond, gen_rtx_SET (target, source));
4613           return 1;
4614         }
4615       can_invert = 1;
4616       break;
4617
4618     case XOR:
4619       if (remainder == 0)
4620         {
4621           if (reload_completed && rtx_equal_p (target, source))
4622             return 0;
4623           if (generate)
4624             emit_constant_insn (cond, gen_rtx_SET (target, source));
4625           return 1;
4626         }
4627
4628       if (remainder == 0xffffffff)
4629         {
4630           if (generate)
4631             emit_constant_insn (cond,
4632                                 gen_rtx_SET (target,
4633                                              gen_rtx_NOT (mode, source)));
4634           return 1;
4635         }
4636       final_invert = 1;
4637       break;
4638
4639     case MINUS:
4640       /* We treat MINUS as (val - source), since (source - val) is always
4641          passed as (source + (-val)).  */
4642       if (remainder == 0)
4643         {
4644           if (generate)
4645             emit_constant_insn (cond,
4646                                 gen_rtx_SET (target,
4647                                              gen_rtx_NEG (mode, source)));
4648           return 1;
4649         }
4650       if (const_ok_for_arm (val))
4651         {
4652           if (generate)
4653             emit_constant_insn (cond,
4654                                 gen_rtx_SET (target,
4655                                              gen_rtx_MINUS (mode, GEN_INT (val),
4656                                                             source)));
4657           return 1;
4658         }
4659
4660       break;
4661
4662     default:
4663       gcc_unreachable ();
4664     }
4665
4666   /* If we can do it in one insn get out quickly.  */
4667   if (const_ok_for_op (val, code))
4668     {
4669       if (generate)
4670         emit_constant_insn (cond,
4671                             gen_rtx_SET (target,
4672                                          (source
4673                                           ? gen_rtx_fmt_ee (code, mode, source,
4674                                                             GEN_INT (val))
4675                                           : GEN_INT (val))));
4676       return 1;
4677     }
4678
4679   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4680      insn.  */
4681   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4682       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4683     {
4684       if (generate)
4685         {
4686           if (mode == SImode && i == 16)
4687             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4688                smaller insn.  */
4689             emit_constant_insn (cond,
4690                                 gen_zero_extendhisi2
4691                                 (target, gen_lowpart (HImode, source)));
4692           else
4693             /* Extz only supports SImode, but we can coerce the operands
4694                into that mode.  */
4695             emit_constant_insn (cond,
4696                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4697                                               gen_lowpart (SImode, source),
4698                                               GEN_INT (i), const0_rtx));
4699         }
4700
4701       return 1;
4702     }
4703
4704   /* Calculate a few attributes that may be useful for specific
4705      optimizations.  */
4706   /* Count number of leading zeros.  */
4707   for (i = 31; i >= 0; i--)
4708     {
4709       if ((remainder & (1 << i)) == 0)
4710         clear_sign_bit_copies++;
4711       else
4712         break;
4713     }
4714
4715   /* Count number of leading 1's.  */
4716   for (i = 31; i >= 0; i--)
4717     {
4718       if ((remainder & (1 << i)) != 0)
4719         set_sign_bit_copies++;
4720       else
4721         break;
4722     }
4723
4724   /* Count number of trailing zero's.  */
4725   for (i = 0; i <= 31; i++)
4726     {
4727       if ((remainder & (1 << i)) == 0)
4728         clear_zero_bit_copies++;
4729       else
4730         break;
4731     }
4732
4733   /* Count number of trailing 1's.  */
4734   for (i = 0; i <= 31; i++)
4735     {
4736       if ((remainder & (1 << i)) != 0)
4737         set_zero_bit_copies++;
4738       else
4739         break;
4740     }
4741
4742   switch (code)
4743     {
4744     case SET:
4745       /* See if we can do this by sign_extending a constant that is known
4746          to be negative.  This is a good, way of doing it, since the shift
4747          may well merge into a subsequent insn.  */
4748       if (set_sign_bit_copies > 1)
4749         {
4750           if (const_ok_for_arm
4751               (temp1 = ARM_SIGN_EXTEND (remainder
4752                                         << (set_sign_bit_copies - 1))))
4753             {
4754               if (generate)
4755                 {
4756                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757                   emit_constant_insn (cond,
4758                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4759                   emit_constant_insn (cond,
4760                                       gen_ashrsi3 (target, new_src,
4761                                                    GEN_INT (set_sign_bit_copies - 1)));
4762                 }
4763               return 2;
4764             }
4765           /* For an inverted constant, we will need to set the low bits,
4766              these will be shifted out of harm's way.  */
4767           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4768           if (const_ok_for_arm (~temp1))
4769             {
4770               if (generate)
4771                 {
4772                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4773                   emit_constant_insn (cond,
4774                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4775                   emit_constant_insn (cond,
4776                                       gen_ashrsi3 (target, new_src,
4777                                                    GEN_INT (set_sign_bit_copies - 1)));
4778                 }
4779               return 2;
4780             }
4781         }
4782
4783       /* See if we can calculate the value as the difference between two
4784          valid immediates.  */
4785       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4786         {
4787           int topshift = clear_sign_bit_copies & ~1;
4788
4789           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4790                                    & (0xff000000 >> topshift));
4791
4792           /* If temp1 is zero, then that means the 9 most significant
4793              bits of remainder were 1 and we've caused it to overflow.
4794              When topshift is 0 we don't need to do anything since we
4795              can borrow from 'bit 32'.  */
4796           if (temp1 == 0 && topshift != 0)
4797             temp1 = 0x80000000 >> (topshift - 1);
4798
4799           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4800
4801           if (const_ok_for_arm (temp2))
4802             {
4803               if (generate)
4804                 {
4805                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4806                   emit_constant_insn (cond,
4807                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4808                   emit_constant_insn (cond,
4809                                       gen_addsi3 (target, new_src,
4810                                                   GEN_INT (-temp2)));
4811                 }
4812
4813               return 2;
4814             }
4815         }
4816
4817       /* See if we can generate this by setting the bottom (or the top)
4818          16 bits, and then shifting these into the other half of the
4819          word.  We only look for the simplest cases, to do more would cost
4820          too much.  Be careful, however, not to generate this when the
4821          alternative would take fewer insns.  */
4822       if (val & 0xffff0000)
4823         {
4824           temp1 = remainder & 0xffff0000;
4825           temp2 = remainder & 0x0000ffff;
4826
4827           /* Overlaps outside this range are best done using other methods.  */
4828           for (i = 9; i < 24; i++)
4829             {
4830               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4831                   && !const_ok_for_arm (temp2))
4832                 {
4833                   rtx new_src = (subtargets
4834                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4835                                  : target);
4836                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4837                                             source, subtargets, generate);
4838                   source = new_src;
4839                   if (generate)
4840                     emit_constant_insn
4841                       (cond,
4842                        gen_rtx_SET
4843                        (target,
4844                         gen_rtx_IOR (mode,
4845                                      gen_rtx_ASHIFT (mode, source,
4846                                                      GEN_INT (i)),
4847                                      source)));
4848                   return insns + 1;
4849                 }
4850             }
4851
4852           /* Don't duplicate cases already considered.  */
4853           for (i = 17; i < 24; i++)
4854             {
4855               if (((temp1 | (temp1 >> i)) == remainder)
4856                   && !const_ok_for_arm (temp1))
4857                 {
4858                   rtx new_src = (subtargets
4859                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4860                                  : target);
4861                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4862                                             source, subtargets, generate);
4863                   source = new_src;
4864                   if (generate)
4865                     emit_constant_insn
4866                       (cond,
4867                        gen_rtx_SET (target,
4868                                     gen_rtx_IOR
4869                                     (mode,
4870                                      gen_rtx_LSHIFTRT (mode, source,
4871                                                        GEN_INT (i)),
4872                                      source)));
4873                   return insns + 1;
4874                 }
4875             }
4876         }
4877       break;
4878
4879     case IOR:
4880     case XOR:
4881       /* If we have IOR or XOR, and the constant can be loaded in a
4882          single instruction, and we can find a temporary to put it in,
4883          then this can be done in two instructions instead of 3-4.  */
4884       if (subtargets
4885           /* TARGET can't be NULL if SUBTARGETS is 0 */
4886           || (reload_completed && !reg_mentioned_p (target, source)))
4887         {
4888           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4889             {
4890               if (generate)
4891                 {
4892                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4893
4894                   emit_constant_insn (cond,
4895                                       gen_rtx_SET (sub, GEN_INT (val)));
4896                   emit_constant_insn (cond,
4897                                       gen_rtx_SET (target,
4898                                                    gen_rtx_fmt_ee (code, mode,
4899                                                                    source, sub)));
4900                 }
4901               return 2;
4902             }
4903         }
4904
4905       if (code == XOR)
4906         break;
4907
4908       /*  Convert.
4909           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4910                              and the remainder 0s for e.g. 0xfff00000)
4911           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4912
4913           This can be done in 2 instructions by using shifts with mov or mvn.
4914           e.g. for
4915           x = x | 0xfff00000;
4916           we generate.
4917           mvn   r0, r0, asl #12
4918           mvn   r0, r0, lsr #12  */
4919       if (set_sign_bit_copies > 8
4920           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4921         {
4922           if (generate)
4923             {
4924               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4925               rtx shift = GEN_INT (set_sign_bit_copies);
4926
4927               emit_constant_insn
4928                 (cond,
4929                  gen_rtx_SET (sub,
4930                               gen_rtx_NOT (mode,
4931                                            gen_rtx_ASHIFT (mode,
4932                                                            source,
4933                                                            shift))));
4934               emit_constant_insn
4935                 (cond,
4936                  gen_rtx_SET (target,
4937                               gen_rtx_NOT (mode,
4938                                            gen_rtx_LSHIFTRT (mode, sub,
4939                                                              shift))));
4940             }
4941           return 2;
4942         }
4943
4944       /* Convert
4945           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4946            to
4947           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4948
4949           For eg. r0 = r0 | 0xfff
4950                mvn      r0, r0, lsr #12
4951                mvn      r0, r0, asl #12
4952
4953       */
4954       if (set_zero_bit_copies > 8
4955           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4956         {
4957           if (generate)
4958             {
4959               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960               rtx shift = GEN_INT (set_zero_bit_copies);
4961
4962               emit_constant_insn
4963                 (cond,
4964                  gen_rtx_SET (sub,
4965                               gen_rtx_NOT (mode,
4966                                            gen_rtx_LSHIFTRT (mode,
4967                                                              source,
4968                                                              shift))));
4969               emit_constant_insn
4970                 (cond,
4971                  gen_rtx_SET (target,
4972                               gen_rtx_NOT (mode,
4973                                            gen_rtx_ASHIFT (mode, sub,
4974                                                            shift))));
4975             }
4976           return 2;
4977         }
4978
4979       /* This will never be reached for Thumb2 because orn is a valid
4980          instruction. This is for Thumb1 and the ARM 32 bit cases.
4981
4982          x = y | constant (such that ~constant is a valid constant)
4983          Transform this to
4984          x = ~(~y & ~constant).
4985       */
4986       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4987         {
4988           if (generate)
4989             {
4990               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4991               emit_constant_insn (cond,
4992                                   gen_rtx_SET (sub,
4993                                                gen_rtx_NOT (mode, source)));
4994               source = sub;
4995               if (subtargets)
4996                 sub = gen_reg_rtx (mode);
4997               emit_constant_insn (cond,
4998                                   gen_rtx_SET (sub,
4999                                                gen_rtx_AND (mode, source,
5000                                                             GEN_INT (temp1))));
5001               emit_constant_insn (cond,
5002                                   gen_rtx_SET (target,
5003                                                gen_rtx_NOT (mode, sub)));
5004             }
5005           return 3;
5006         }
5007       break;
5008
5009     case AND:
5010       /* See if two shifts will do 2 or more insn's worth of work.  */
5011       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5012         {
5013           HOST_WIDE_INT shift_mask = ((0xffffffff
5014                                        << (32 - clear_sign_bit_copies))
5015                                       & 0xffffffff);
5016
5017           if ((remainder | shift_mask) != 0xffffffff)
5018             {
5019               HOST_WIDE_INT new_val
5020                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5021
5022               if (generate)
5023                 {
5024                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5025                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5026                                             new_src, source, subtargets, 1);
5027                   source = new_src;
5028                 }
5029               else
5030                 {
5031                   rtx targ = subtargets ? NULL_RTX : target;
5032                   insns = arm_gen_constant (AND, mode, cond, new_val,
5033                                             targ, source, subtargets, 0);
5034                 }
5035             }
5036
5037           if (generate)
5038             {
5039               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5040               rtx shift = GEN_INT (clear_sign_bit_copies);
5041
5042               emit_insn (gen_ashlsi3 (new_src, source, shift));
5043               emit_insn (gen_lshrsi3 (target, new_src, shift));
5044             }
5045
5046           return insns + 2;
5047         }
5048
5049       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5050         {
5051           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5052
5053           if ((remainder | shift_mask) != 0xffffffff)
5054             {
5055               HOST_WIDE_INT new_val
5056                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5057               if (generate)
5058                 {
5059                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060
5061                   insns = arm_gen_constant (AND, mode, cond, new_val,
5062                                             new_src, source, subtargets, 1);
5063                   source = new_src;
5064                 }
5065               else
5066                 {
5067                   rtx targ = subtargets ? NULL_RTX : target;
5068
5069                   insns = arm_gen_constant (AND, mode, cond, new_val,
5070                                             targ, source, subtargets, 0);
5071                 }
5072             }
5073
5074           if (generate)
5075             {
5076               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5077               rtx shift = GEN_INT (clear_zero_bit_copies);
5078
5079               emit_insn (gen_lshrsi3 (new_src, source, shift));
5080               emit_insn (gen_ashlsi3 (target, new_src, shift));
5081             }
5082
5083           return insns + 2;
5084         }
5085
5086       break;
5087
5088     default:
5089       break;
5090     }
5091
5092   /* Calculate what the instruction sequences would be if we generated it
5093      normally, negated, or inverted.  */
5094   if (code == AND)
5095     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5096     insns = 99;
5097   else
5098     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5099
5100   if (can_negate)
5101     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5102                                             &neg_immediates);
5103   else
5104     neg_insns = 99;
5105
5106   if (can_invert || final_invert)
5107     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5108                                             &inv_immediates);
5109   else
5110     inv_insns = 99;
5111
5112   immediates = &pos_immediates;
5113
5114   /* Is the negated immediate sequence more efficient?  */
5115   if (neg_insns < insns && neg_insns <= inv_insns)
5116     {
5117       insns = neg_insns;
5118       immediates = &neg_immediates;
5119     }
5120   else
5121     can_negate = 0;
5122
5123   /* Is the inverted immediate sequence more efficient?
5124      We must allow for an extra NOT instruction for XOR operations, although
5125      there is some chance that the final 'mvn' will get optimized later.  */
5126   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5127     {
5128       insns = inv_insns;
5129       immediates = &inv_immediates;
5130     }
5131   else
5132     {
5133       can_invert = 0;
5134       final_invert = 0;
5135     }
5136
5137   /* Now output the chosen sequence as instructions.  */
5138   if (generate)
5139     {
5140       for (i = 0; i < insns; i++)
5141         {
5142           rtx new_src, temp1_rtx;
5143
5144           temp1 = immediates->i[i];
5145
5146           if (code == SET || code == MINUS)
5147             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5148           else if ((final_invert || i < (insns - 1)) && subtargets)
5149             new_src = gen_reg_rtx (mode);
5150           else
5151             new_src = target;
5152
5153           if (can_invert)
5154             temp1 = ~temp1;
5155           else if (can_negate)
5156             temp1 = -temp1;
5157
5158           temp1 = trunc_int_for_mode (temp1, mode);
5159           temp1_rtx = GEN_INT (temp1);
5160
5161           if (code == SET)
5162             ;
5163           else if (code == MINUS)
5164             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5165           else
5166             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5167
5168           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5169           source = new_src;
5170
5171           if (code == SET)
5172             {
5173               can_negate = can_invert;
5174               can_invert = 0;
5175               code = PLUS;
5176             }
5177           else if (code == MINUS)
5178             code = PLUS;
5179         }
5180     }
5181
5182   if (final_invert)
5183     {
5184       if (generate)
5185         emit_constant_insn (cond, gen_rtx_SET (target,
5186                                                gen_rtx_NOT (mode, source)));
5187       insns++;
5188     }
5189
5190   return insns;
5191 }
5192
5193 /* Canonicalize a comparison so that we are more likely to recognize it.
5194    This can be done for a few constant compares, where we can make the
5195    immediate value easier to load.  */
5196
5197 static void
5198 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5199                              bool op0_preserve_value)
5200 {
5201   machine_mode mode;
5202   unsigned HOST_WIDE_INT i, maxval;
5203
5204   mode = GET_MODE (*op0);
5205   if (mode == VOIDmode)
5206     mode = GET_MODE (*op1);
5207
5208   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5209
5210   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5211      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5212      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5213      for GTU/LEU in Thumb mode.  */
5214   if (mode == DImode)
5215     {
5216
5217       if (*code == GT || *code == LE
5218           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5219         {
5220           /* Missing comparison.  First try to use an available
5221              comparison.  */
5222           if (CONST_INT_P (*op1))
5223             {
5224               i = INTVAL (*op1);
5225               switch (*code)
5226                 {
5227                 case GT:
5228                 case LE:
5229                   if (i != maxval
5230                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5231                     {
5232                       *op1 = GEN_INT (i + 1);
5233                       *code = *code == GT ? GE : LT;
5234                       return;
5235                     }
5236                   break;
5237                 case GTU:
5238                 case LEU:
5239                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5240                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5241                     {
5242                       *op1 = GEN_INT (i + 1);
5243                       *code = *code == GTU ? GEU : LTU;
5244                       return;
5245                     }
5246                   break;
5247                 default:
5248                   gcc_unreachable ();
5249                 }
5250             }
5251
5252           /* If that did not work, reverse the condition.  */
5253           if (!op0_preserve_value)
5254             {
5255               std::swap (*op0, *op1);
5256               *code = (int)swap_condition ((enum rtx_code)*code);
5257             }
5258         }
5259       return;
5260     }
5261
5262   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5263      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5264      to facilitate possible combining with a cmp into 'ands'.  */
5265   if (mode == SImode
5266       && GET_CODE (*op0) == ZERO_EXTEND
5267       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5268       && GET_MODE (XEXP (*op0, 0)) == QImode
5269       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5270       && subreg_lowpart_p (XEXP (*op0, 0))
5271       && *op1 == const0_rtx)
5272     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5273                         GEN_INT (255));
5274
5275   /* Comparisons smaller than DImode.  Only adjust comparisons against
5276      an out-of-range constant.  */
5277   if (!CONST_INT_P (*op1)
5278       || const_ok_for_arm (INTVAL (*op1))
5279       || const_ok_for_arm (- INTVAL (*op1)))
5280     return;
5281
5282   i = INTVAL (*op1);
5283
5284   switch (*code)
5285     {
5286     case EQ:
5287     case NE:
5288       return;
5289
5290     case GT:
5291     case LE:
5292       if (i != maxval
5293           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5294         {
5295           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5296           *code = *code == GT ? GE : LT;
5297           return;
5298         }
5299       break;
5300
5301     case GE:
5302     case LT:
5303       if (i != ~maxval
5304           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5305         {
5306           *op1 = GEN_INT (i - 1);
5307           *code = *code == GE ? GT : LE;
5308           return;
5309         }
5310       break;
5311
5312     case GTU:
5313     case LEU:
5314       if (i != ~((unsigned HOST_WIDE_INT) 0)
5315           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5316         {
5317           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5318           *code = *code == GTU ? GEU : LTU;
5319           return;
5320         }
5321       break;
5322
5323     case GEU:
5324     case LTU:
5325       if (i != 0
5326           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5327         {
5328           *op1 = GEN_INT (i - 1);
5329           *code = *code == GEU ? GTU : LEU;
5330           return;
5331         }
5332       break;
5333
5334     default:
5335       gcc_unreachable ();
5336     }
5337 }
5338
5339
5340 /* Define how to find the value returned by a function.  */
5341
5342 static rtx
5343 arm_function_value(const_tree type, const_tree func,
5344                    bool outgoing ATTRIBUTE_UNUSED)
5345 {
5346   machine_mode mode;
5347   int unsignedp ATTRIBUTE_UNUSED;
5348   rtx r ATTRIBUTE_UNUSED;
5349
5350   mode = TYPE_MODE (type);
5351
5352   if (TARGET_AAPCS_BASED)
5353     return aapcs_allocate_return_reg (mode, type, func);
5354
5355   /* Promote integer types.  */
5356   if (INTEGRAL_TYPE_P (type))
5357     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5358
5359   /* Promotes small structs returned in a register to full-word size
5360      for big-endian AAPCS.  */
5361   if (arm_return_in_msb (type))
5362     {
5363       HOST_WIDE_INT size = int_size_in_bytes (type);
5364       if (size % UNITS_PER_WORD != 0)
5365         {
5366           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5367           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5368         }
5369     }
5370
5371   return arm_libcall_value_1 (mode);
5372 }
5373
5374 /* libcall hashtable helpers.  */
5375
5376 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5377 {
5378   static inline hashval_t hash (const rtx_def *);
5379   static inline bool equal (const rtx_def *, const rtx_def *);
5380   static inline void remove (rtx_def *);
5381 };
5382
5383 inline bool
5384 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5385 {
5386   return rtx_equal_p (p1, p2);
5387 }
5388
5389 inline hashval_t
5390 libcall_hasher::hash (const rtx_def *p1)
5391 {
5392   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5393 }
5394
5395 typedef hash_table<libcall_hasher> libcall_table_type;
5396
5397 static void
5398 add_libcall (libcall_table_type *htab, rtx libcall)
5399 {
5400   *htab->find_slot (libcall, INSERT) = libcall;
5401 }
5402
5403 static bool
5404 arm_libcall_uses_aapcs_base (const_rtx libcall)
5405 {
5406   static bool init_done = false;
5407   static libcall_table_type *libcall_htab = NULL;
5408
5409   if (!init_done)
5410     {
5411       init_done = true;
5412
5413       libcall_htab = new libcall_table_type (31);
5414       add_libcall (libcall_htab,
5415                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5416       add_libcall (libcall_htab,
5417                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5418       add_libcall (libcall_htab,
5419                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5420       add_libcall (libcall_htab,
5421                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5422
5423       add_libcall (libcall_htab,
5424                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5425       add_libcall (libcall_htab,
5426                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5427       add_libcall (libcall_htab,
5428                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5429       add_libcall (libcall_htab,
5430                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5431
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5434       add_libcall (libcall_htab,
5435                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5436       add_libcall (libcall_htab,
5437                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5438       add_libcall (libcall_htab,
5439                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5440       add_libcall (libcall_htab,
5441                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5442       add_libcall (libcall_htab,
5443                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5444       add_libcall (libcall_htab,
5445                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5446       add_libcall (libcall_htab,
5447                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5448
5449       /* Values from double-precision helper functions are returned in core
5450          registers if the selected core only supports single-precision
5451          arithmetic, even if we are using the hard-float ABI.  The same is
5452          true for single-precision helpers, but we will never be using the
5453          hard-float ABI on a CPU which doesn't support single-precision
5454          operations in hardware.  */
5455       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5456       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5457       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5458       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5459       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5460       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5461       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5462       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5463       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5464       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5465       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5466       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5467                                                         SFmode));
5468       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5469                                                         DFmode));
5470       add_libcall (libcall_htab,
5471                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5472     }
5473
5474   return libcall && libcall_htab->find (libcall) != NULL;
5475 }
5476
5477 static rtx
5478 arm_libcall_value_1 (machine_mode mode)
5479 {
5480   if (TARGET_AAPCS_BASED)
5481     return aapcs_libcall_value (mode);
5482   else if (TARGET_IWMMXT_ABI
5483            && arm_vector_mode_supported_p (mode))
5484     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5485   else
5486     return gen_rtx_REG (mode, ARG_REGISTER (1));
5487 }
5488
5489 /* Define how to find the value returned by a library function
5490    assuming the value has mode MODE.  */
5491
5492 static rtx
5493 arm_libcall_value (machine_mode mode, const_rtx libcall)
5494 {
5495   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5496       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5497     {
5498       /* The following libcalls return their result in integer registers,
5499          even though they return a floating point value.  */
5500       if (arm_libcall_uses_aapcs_base (libcall))
5501         return gen_rtx_REG (mode, ARG_REGISTER(1));
5502
5503     }
5504
5505   return arm_libcall_value_1 (mode);
5506 }
5507
5508 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5509
5510 static bool
5511 arm_function_value_regno_p (const unsigned int regno)
5512 {
5513   if (regno == ARG_REGISTER (1)
5514       || (TARGET_32BIT
5515           && TARGET_AAPCS_BASED
5516           && TARGET_HARD_FLOAT
5517           && regno == FIRST_VFP_REGNUM)
5518       || (TARGET_IWMMXT_ABI
5519           && regno == FIRST_IWMMXT_REGNUM))
5520     return true;
5521
5522   return false;
5523 }
5524
5525 /* Determine the amount of memory needed to store the possible return
5526    registers of an untyped call.  */
5527 int
5528 arm_apply_result_size (void)
5529 {
5530   int size = 16;
5531
5532   if (TARGET_32BIT)
5533     {
5534       if (TARGET_HARD_FLOAT_ABI)
5535         size += 32;
5536       if (TARGET_IWMMXT_ABI)
5537         size += 8;
5538     }
5539
5540   return size;
5541 }
5542
5543 /* Decide whether TYPE should be returned in memory (true)
5544    or in a register (false).  FNTYPE is the type of the function making
5545    the call.  */
5546 static bool
5547 arm_return_in_memory (const_tree type, const_tree fntype)
5548 {
5549   HOST_WIDE_INT size;
5550
5551   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5552
5553   if (TARGET_AAPCS_BASED)
5554     {
5555       /* Simple, non-aggregate types (ie not including vectors and
5556          complex) are always returned in a register (or registers).
5557          We don't care about which register here, so we can short-cut
5558          some of the detail.  */
5559       if (!AGGREGATE_TYPE_P (type)
5560           && TREE_CODE (type) != VECTOR_TYPE
5561           && TREE_CODE (type) != COMPLEX_TYPE)
5562         return false;
5563
5564       /* Any return value that is no larger than one word can be
5565          returned in r0.  */
5566       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5567         return false;
5568
5569       /* Check any available co-processors to see if they accept the
5570          type as a register candidate (VFP, for example, can return
5571          some aggregates in consecutive registers).  These aren't
5572          available if the call is variadic.  */
5573       if (aapcs_select_return_coproc (type, fntype) >= 0)
5574         return false;
5575
5576       /* Vector values should be returned using ARM registers, not
5577          memory (unless they're over 16 bytes, which will break since
5578          we only have four call-clobbered registers to play with).  */
5579       if (TREE_CODE (type) == VECTOR_TYPE)
5580         return (size < 0 || size > (4 * UNITS_PER_WORD));
5581
5582       /* The rest go in memory.  */
5583       return true;
5584     }
5585
5586   if (TREE_CODE (type) == VECTOR_TYPE)
5587     return (size < 0 || size > (4 * UNITS_PER_WORD));
5588
5589   if (!AGGREGATE_TYPE_P (type) &&
5590       (TREE_CODE (type) != VECTOR_TYPE))
5591     /* All simple types are returned in registers.  */
5592     return false;
5593
5594   if (arm_abi != ARM_ABI_APCS)
5595     {
5596       /* ATPCS and later return aggregate types in memory only if they are
5597          larger than a word (or are variable size).  */
5598       return (size < 0 || size > UNITS_PER_WORD);
5599     }
5600
5601   /* For the arm-wince targets we choose to be compatible with Microsoft's
5602      ARM and Thumb compilers, which always return aggregates in memory.  */
5603 #ifndef ARM_WINCE
5604   /* All structures/unions bigger than one word are returned in memory.
5605      Also catch the case where int_size_in_bytes returns -1.  In this case
5606      the aggregate is either huge or of variable size, and in either case
5607      we will want to return it via memory and not in a register.  */
5608   if (size < 0 || size > UNITS_PER_WORD)
5609     return true;
5610
5611   if (TREE_CODE (type) == RECORD_TYPE)
5612     {
5613       tree field;
5614
5615       /* For a struct the APCS says that we only return in a register
5616          if the type is 'integer like' and every addressable element
5617          has an offset of zero.  For practical purposes this means
5618          that the structure can have at most one non bit-field element
5619          and that this element must be the first one in the structure.  */
5620
5621       /* Find the first field, ignoring non FIELD_DECL things which will
5622          have been created by C++.  */
5623       for (field = TYPE_FIELDS (type);
5624            field && TREE_CODE (field) != FIELD_DECL;
5625            field = DECL_CHAIN (field))
5626         continue;
5627
5628       if (field == NULL)
5629         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5630
5631       /* Check that the first field is valid for returning in a register.  */
5632
5633       /* ... Floats are not allowed */
5634       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5635         return true;
5636
5637       /* ... Aggregates that are not themselves valid for returning in
5638          a register are not allowed.  */
5639       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5640         return true;
5641
5642       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5643          since they are not addressable.  */
5644       for (field = DECL_CHAIN (field);
5645            field;
5646            field = DECL_CHAIN (field))
5647         {
5648           if (TREE_CODE (field) != FIELD_DECL)
5649             continue;
5650
5651           if (!DECL_BIT_FIELD_TYPE (field))
5652             return true;
5653         }
5654
5655       return false;
5656     }
5657
5658   if (TREE_CODE (type) == UNION_TYPE)
5659     {
5660       tree field;
5661
5662       /* Unions can be returned in registers if every element is
5663          integral, or can be returned in an integer register.  */
5664       for (field = TYPE_FIELDS (type);
5665            field;
5666            field = DECL_CHAIN (field))
5667         {
5668           if (TREE_CODE (field) != FIELD_DECL)
5669             continue;
5670
5671           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5672             return true;
5673
5674           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5675             return true;
5676         }
5677
5678       return false;
5679     }
5680 #endif /* not ARM_WINCE */
5681
5682   /* Return all other types in memory.  */
5683   return true;
5684 }
5685
5686 const struct pcs_attribute_arg
5687 {
5688   const char *arg;
5689   enum arm_pcs value;
5690 } pcs_attribute_args[] =
5691   {
5692     {"aapcs", ARM_PCS_AAPCS},
5693     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5694 #if 0
5695     /* We could recognize these, but changes would be needed elsewhere
5696      * to implement them.  */
5697     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5698     {"atpcs", ARM_PCS_ATPCS},
5699     {"apcs", ARM_PCS_APCS},
5700 #endif
5701     {NULL, ARM_PCS_UNKNOWN}
5702   };
5703
5704 static enum arm_pcs
5705 arm_pcs_from_attribute (tree attr)
5706 {
5707   const struct pcs_attribute_arg *ptr;
5708   const char *arg;
5709
5710   /* Get the value of the argument.  */
5711   if (TREE_VALUE (attr) == NULL_TREE
5712       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5713     return ARM_PCS_UNKNOWN;
5714
5715   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5716
5717   /* Check it against the list of known arguments.  */
5718   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5719     if (streq (arg, ptr->arg))
5720       return ptr->value;
5721
5722   /* An unrecognized interrupt type.  */
5723   return ARM_PCS_UNKNOWN;
5724 }
5725
5726 /* Get the PCS variant to use for this call.  TYPE is the function's type
5727    specification, DECL is the specific declartion.  DECL may be null if
5728    the call could be indirect or if this is a library call.  */
5729 static enum arm_pcs
5730 arm_get_pcs_model (const_tree type, const_tree decl)
5731 {
5732   bool user_convention = false;
5733   enum arm_pcs user_pcs = arm_pcs_default;
5734   tree attr;
5735
5736   gcc_assert (type);
5737
5738   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5739   if (attr)
5740     {
5741       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5742       user_convention = true;
5743     }
5744
5745   if (TARGET_AAPCS_BASED)
5746     {
5747       /* Detect varargs functions.  These always use the base rules
5748          (no argument is ever a candidate for a co-processor
5749          register).  */
5750       bool base_rules = stdarg_p (type);
5751
5752       if (user_convention)
5753         {
5754           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5755             sorry ("non-AAPCS derived PCS variant");
5756           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5757             error ("variadic functions must use the base AAPCS variant");
5758         }
5759
5760       if (base_rules)
5761         return ARM_PCS_AAPCS;
5762       else if (user_convention)
5763         return user_pcs;
5764       else if (decl && flag_unit_at_a_time)
5765         {
5766           /* Local functions never leak outside this compilation unit,
5767              so we are free to use whatever conventions are
5768              appropriate.  */
5769           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5770           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5771           if (i && i->local)
5772             return ARM_PCS_AAPCS_LOCAL;
5773         }
5774     }
5775   else if (user_convention && user_pcs != arm_pcs_default)
5776     sorry ("PCS variant");
5777
5778   /* For everything else we use the target's default.  */
5779   return arm_pcs_default;
5780 }
5781
5782
5783 static void
5784 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5785                     const_tree fntype ATTRIBUTE_UNUSED,
5786                     rtx libcall ATTRIBUTE_UNUSED,
5787                     const_tree fndecl ATTRIBUTE_UNUSED)
5788 {
5789   /* Record the unallocated VFP registers.  */
5790   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5791   pcum->aapcs_vfp_reg_alloc = 0;
5792 }
5793
5794 /* Walk down the type tree of TYPE counting consecutive base elements.
5795    If *MODEP is VOIDmode, then set it to the first valid floating point
5796    type.  If a non-floating point type is found, or if a floating point
5797    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5798    otherwise return the count in the sub-tree.  */
5799 static int
5800 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5801 {
5802   machine_mode mode;
5803   HOST_WIDE_INT size;
5804
5805   switch (TREE_CODE (type))
5806     {
5807     case REAL_TYPE:
5808       mode = TYPE_MODE (type);
5809       if (mode != DFmode && mode != SFmode && mode != HFmode)
5810         return -1;
5811
5812       if (*modep == VOIDmode)
5813         *modep = mode;
5814
5815       if (*modep == mode)
5816         return 1;
5817
5818       break;
5819
5820     case COMPLEX_TYPE:
5821       mode = TYPE_MODE (TREE_TYPE (type));
5822       if (mode != DFmode && mode != SFmode)
5823         return -1;
5824
5825       if (*modep == VOIDmode)
5826         *modep = mode;
5827
5828       if (*modep == mode)
5829         return 2;
5830
5831       break;
5832
5833     case VECTOR_TYPE:
5834       /* Use V2SImode and V4SImode as representatives of all 64-bit
5835          and 128-bit vector types, whether or not those modes are
5836          supported with the present options.  */
5837       size = int_size_in_bytes (type);
5838       switch (size)
5839         {
5840         case 8:
5841           mode = V2SImode;
5842           break;
5843         case 16:
5844           mode = V4SImode;
5845           break;
5846         default:
5847           return -1;
5848         }
5849
5850       if (*modep == VOIDmode)
5851         *modep = mode;
5852
5853       /* Vector modes are considered to be opaque: two vectors are
5854          equivalent for the purposes of being homogeneous aggregates
5855          if they are the same size.  */
5856       if (*modep == mode)
5857         return 1;
5858
5859       break;
5860
5861     case ARRAY_TYPE:
5862       {
5863         int count;
5864         tree index = TYPE_DOMAIN (type);
5865
5866         /* Can't handle incomplete types nor sizes that are not
5867            fixed.  */
5868         if (!COMPLETE_TYPE_P (type)
5869             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5870           return -1;
5871
5872         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5873         if (count == -1
5874             || !index
5875             || !TYPE_MAX_VALUE (index)
5876             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5877             || !TYPE_MIN_VALUE (index)
5878             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5879             || count < 0)
5880           return -1;
5881
5882         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5883                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5884
5885         /* There must be no padding.  */
5886         if (wi::to_wide (TYPE_SIZE (type))
5887             != count * GET_MODE_BITSIZE (*modep))
5888           return -1;
5889
5890         return count;
5891       }
5892
5893     case RECORD_TYPE:
5894       {
5895         int count = 0;
5896         int sub_count;
5897         tree field;
5898
5899         /* Can't handle incomplete types nor sizes that are not
5900            fixed.  */
5901         if (!COMPLETE_TYPE_P (type)
5902             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5903           return -1;
5904
5905         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5906           {
5907             if (TREE_CODE (field) != FIELD_DECL)
5908               continue;
5909
5910             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5911             if (sub_count < 0)
5912               return -1;
5913             count += sub_count;
5914           }
5915
5916         /* There must be no padding.  */
5917         if (wi::to_wide (TYPE_SIZE (type))
5918             != count * GET_MODE_BITSIZE (*modep))
5919           return -1;
5920
5921         return count;
5922       }
5923
5924     case UNION_TYPE:
5925     case QUAL_UNION_TYPE:
5926       {
5927         /* These aren't very interesting except in a degenerate case.  */
5928         int count = 0;
5929         int sub_count;
5930         tree field;
5931
5932         /* Can't handle incomplete types nor sizes that are not
5933            fixed.  */
5934         if (!COMPLETE_TYPE_P (type)
5935             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5936           return -1;
5937
5938         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5939           {
5940             if (TREE_CODE (field) != FIELD_DECL)
5941               continue;
5942
5943             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5944             if (sub_count < 0)
5945               return -1;
5946             count = count > sub_count ? count : sub_count;
5947           }
5948
5949         /* There must be no padding.  */
5950         if (wi::to_wide (TYPE_SIZE (type))
5951             != count * GET_MODE_BITSIZE (*modep))
5952           return -1;
5953
5954         return count;
5955       }
5956
5957     default:
5958       break;
5959     }
5960
5961   return -1;
5962 }
5963
5964 /* Return true if PCS_VARIANT should use VFP registers.  */
5965 static bool
5966 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5967 {
5968   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5969     {
5970       static bool seen_thumb1_vfp = false;
5971
5972       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5973         {
5974           sorry ("Thumb-1 hard-float VFP ABI");
5975           /* sorry() is not immediately fatal, so only display this once.  */
5976           seen_thumb1_vfp = true;
5977         }
5978
5979       return true;
5980     }
5981
5982   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5983     return false;
5984
5985   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5986           (TARGET_VFP_DOUBLE || !is_double));
5987 }
5988
5989 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5990    suitable for passing or returning in VFP registers for the PCS
5991    variant selected.  If it is, then *BASE_MODE is updated to contain
5992    a machine mode describing each element of the argument's type and
5993    *COUNT to hold the number of such elements.  */
5994 static bool
5995 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5996                                        machine_mode mode, const_tree type,
5997                                        machine_mode *base_mode, int *count)
5998 {
5999   machine_mode new_mode = VOIDmode;
6000
6001   /* If we have the type information, prefer that to working things
6002      out from the mode.  */
6003   if (type)
6004     {
6005       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6006
6007       if (ag_count > 0 && ag_count <= 4)
6008         *count = ag_count;
6009       else
6010         return false;
6011     }
6012   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6013            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6014            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6015     {
6016       *count = 1;
6017       new_mode = mode;
6018     }
6019   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6020     {
6021       *count = 2;
6022       new_mode = (mode == DCmode ? DFmode : SFmode);
6023     }
6024   else
6025     return false;
6026
6027
6028   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6029     return false;
6030
6031   *base_mode = new_mode;
6032   return true;
6033 }
6034
6035 static bool
6036 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6037                                machine_mode mode, const_tree type)
6038 {
6039   int count ATTRIBUTE_UNUSED;
6040   machine_mode ag_mode ATTRIBUTE_UNUSED;
6041
6042   if (!use_vfp_abi (pcs_variant, false))
6043     return false;
6044   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6045                                                 &ag_mode, &count);
6046 }
6047
6048 static bool
6049 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6050                              const_tree type)
6051 {
6052   if (!use_vfp_abi (pcum->pcs_variant, false))
6053     return false;
6054
6055   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6056                                                 &pcum->aapcs_vfp_rmode,
6057                                                 &pcum->aapcs_vfp_rcount);
6058 }
6059
6060 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6061    for the behaviour of this function.  */
6062
6063 static bool
6064 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6065                     const_tree type  ATTRIBUTE_UNUSED)
6066 {
6067   int rmode_size
6068     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6069   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6070   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6071   int regno;
6072
6073   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6074     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6075       {
6076         pcum->aapcs_vfp_reg_alloc = mask << regno;
6077         if (mode == BLKmode
6078             || (mode == TImode && ! TARGET_NEON)
6079             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6080           {
6081             int i;
6082             int rcount = pcum->aapcs_vfp_rcount;
6083             int rshift = shift;
6084             machine_mode rmode = pcum->aapcs_vfp_rmode;
6085             rtx par;
6086             if (!TARGET_NEON)
6087               {
6088                 /* Avoid using unsupported vector modes.  */
6089                 if (rmode == V2SImode)
6090                   rmode = DImode;
6091                 else if (rmode == V4SImode)
6092                   {
6093                     rmode = DImode;
6094                     rcount *= 2;
6095                     rshift /= 2;
6096                   }
6097               }
6098             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6099             for (i = 0; i < rcount; i++)
6100               {
6101                 rtx tmp = gen_rtx_REG (rmode,
6102                                        FIRST_VFP_REGNUM + regno + i * rshift);
6103                 tmp = gen_rtx_EXPR_LIST
6104                   (VOIDmode, tmp,
6105                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6106                 XVECEXP (par, 0, i) = tmp;
6107               }
6108
6109             pcum->aapcs_reg = par;
6110           }
6111         else
6112           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6113         return true;
6114       }
6115   return false;
6116 }
6117
6118 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6119    comment there for the behaviour of this function.  */
6120
6121 static rtx
6122 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6123                                machine_mode mode,
6124                                const_tree type ATTRIBUTE_UNUSED)
6125 {
6126   if (!use_vfp_abi (pcs_variant, false))
6127     return NULL;
6128
6129   if (mode == BLKmode
6130       || (GET_MODE_CLASS (mode) == MODE_INT
6131           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6132           && !TARGET_NEON))
6133     {
6134       int count;
6135       machine_mode ag_mode;
6136       int i;
6137       rtx par;
6138       int shift;
6139
6140       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6141                                              &ag_mode, &count);
6142
6143       if (!TARGET_NEON)
6144         {
6145           if (ag_mode == V2SImode)
6146             ag_mode = DImode;
6147           else if (ag_mode == V4SImode)
6148             {
6149               ag_mode = DImode;
6150               count *= 2;
6151             }
6152         }
6153       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6154       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6155       for (i = 0; i < count; i++)
6156         {
6157           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6158           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6159                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6160           XVECEXP (par, 0, i) = tmp;
6161         }
6162
6163       return par;
6164     }
6165
6166   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6167 }
6168
6169 static void
6170 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6171                    machine_mode mode  ATTRIBUTE_UNUSED,
6172                    const_tree type  ATTRIBUTE_UNUSED)
6173 {
6174   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6175   pcum->aapcs_vfp_reg_alloc = 0;
6176   return;
6177 }
6178
6179 #define AAPCS_CP(X)                             \
6180   {                                             \
6181     aapcs_ ## X ## _cum_init,                   \
6182     aapcs_ ## X ## _is_call_candidate,          \
6183     aapcs_ ## X ## _allocate,                   \
6184     aapcs_ ## X ## _is_return_candidate,        \
6185     aapcs_ ## X ## _allocate_return_reg,        \
6186     aapcs_ ## X ## _advance                     \
6187   }
6188
6189 /* Table of co-processors that can be used to pass arguments in
6190    registers.  Idealy no arugment should be a candidate for more than
6191    one co-processor table entry, but the table is processed in order
6192    and stops after the first match.  If that entry then fails to put
6193    the argument into a co-processor register, the argument will go on
6194    the stack.  */
6195 static struct
6196 {
6197   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6198   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6199
6200   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6201      BLKmode) is a candidate for this co-processor's registers; this
6202      function should ignore any position-dependent state in
6203      CUMULATIVE_ARGS and only use call-type dependent information.  */
6204   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6205
6206   /* Return true if the argument does get a co-processor register; it
6207      should set aapcs_reg to an RTX of the register allocated as is
6208      required for a return from FUNCTION_ARG.  */
6209   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6210
6211   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6212      be returned in this co-processor's registers.  */
6213   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6214
6215   /* Allocate and return an RTX element to hold the return type of a call.  This
6216      routine must not fail and will only be called if is_return_candidate
6217      returned true with the same parameters.  */
6218   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6219
6220   /* Finish processing this argument and prepare to start processing
6221      the next one.  */
6222   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6223 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6224   {
6225     AAPCS_CP(vfp)
6226   };
6227
6228 #undef AAPCS_CP
6229
6230 static int
6231 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6232                           const_tree type)
6233 {
6234   int i;
6235
6236   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6237     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6238       return i;
6239
6240   return -1;
6241 }
6242
6243 static int
6244 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6245 {
6246   /* We aren't passed a decl, so we can't check that a call is local.
6247      However, it isn't clear that that would be a win anyway, since it
6248      might limit some tail-calling opportunities.  */
6249   enum arm_pcs pcs_variant;
6250
6251   if (fntype)
6252     {
6253       const_tree fndecl = NULL_TREE;
6254
6255       if (TREE_CODE (fntype) == FUNCTION_DECL)
6256         {
6257           fndecl = fntype;
6258           fntype = TREE_TYPE (fntype);
6259         }
6260
6261       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6262     }
6263   else
6264     pcs_variant = arm_pcs_default;
6265
6266   if (pcs_variant != ARM_PCS_AAPCS)
6267     {
6268       int i;
6269
6270       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6271         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6272                                                         TYPE_MODE (type),
6273                                                         type))
6274           return i;
6275     }
6276   return -1;
6277 }
6278
6279 static rtx
6280 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6281                            const_tree fntype)
6282 {
6283   /* We aren't passed a decl, so we can't check that a call is local.
6284      However, it isn't clear that that would be a win anyway, since it
6285      might limit some tail-calling opportunities.  */
6286   enum arm_pcs pcs_variant;
6287   int unsignedp ATTRIBUTE_UNUSED;
6288
6289   if (fntype)
6290     {
6291       const_tree fndecl = NULL_TREE;
6292
6293       if (TREE_CODE (fntype) == FUNCTION_DECL)
6294         {
6295           fndecl = fntype;
6296           fntype = TREE_TYPE (fntype);
6297         }
6298
6299       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6300     }
6301   else
6302     pcs_variant = arm_pcs_default;
6303
6304   /* Promote integer types.  */
6305   if (type && INTEGRAL_TYPE_P (type))
6306     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6307
6308   if (pcs_variant != ARM_PCS_AAPCS)
6309     {
6310       int i;
6311
6312       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6313         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6314                                                         type))
6315           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6316                                                              mode, type);
6317     }
6318
6319   /* Promotes small structs returned in a register to full-word size
6320      for big-endian AAPCS.  */
6321   if (type && arm_return_in_msb (type))
6322     {
6323       HOST_WIDE_INT size = int_size_in_bytes (type);
6324       if (size % UNITS_PER_WORD != 0)
6325         {
6326           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6327           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6328         }
6329     }
6330
6331   return gen_rtx_REG (mode, R0_REGNUM);
6332 }
6333
6334 static rtx
6335 aapcs_libcall_value (machine_mode mode)
6336 {
6337   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6338       && GET_MODE_SIZE (mode) <= 4)
6339     mode = SImode;
6340
6341   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6342 }
6343
6344 /* Lay out a function argument using the AAPCS rules.  The rule
6345    numbers referred to here are those in the AAPCS.  */
6346 static void
6347 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6348                   const_tree type, bool named)
6349 {
6350   int nregs, nregs2;
6351   int ncrn;
6352
6353   /* We only need to do this once per argument.  */
6354   if (pcum->aapcs_arg_processed)
6355     return;
6356
6357   pcum->aapcs_arg_processed = true;
6358
6359   /* Special case: if named is false then we are handling an incoming
6360      anonymous argument which is on the stack.  */
6361   if (!named)
6362     return;
6363
6364   /* Is this a potential co-processor register candidate?  */
6365   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6366     {
6367       int slot = aapcs_select_call_coproc (pcum, mode, type);
6368       pcum->aapcs_cprc_slot = slot;
6369
6370       /* We don't have to apply any of the rules from part B of the
6371          preparation phase, these are handled elsewhere in the
6372          compiler.  */
6373
6374       if (slot >= 0)
6375         {
6376           /* A Co-processor register candidate goes either in its own
6377              class of registers or on the stack.  */
6378           if (!pcum->aapcs_cprc_failed[slot])
6379             {
6380               /* C1.cp - Try to allocate the argument to co-processor
6381                  registers.  */
6382               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6383                 return;
6384
6385               /* C2.cp - Put the argument on the stack and note that we
6386                  can't assign any more candidates in this slot.  We also
6387                  need to note that we have allocated stack space, so that
6388                  we won't later try to split a non-cprc candidate between
6389                  core registers and the stack.  */
6390               pcum->aapcs_cprc_failed[slot] = true;
6391               pcum->can_split = false;
6392             }
6393
6394           /* We didn't get a register, so this argument goes on the
6395              stack.  */
6396           gcc_assert (pcum->can_split == false);
6397           return;
6398         }
6399     }
6400
6401   /* C3 - For double-word aligned arguments, round the NCRN up to the
6402      next even number.  */
6403   ncrn = pcum->aapcs_ncrn;
6404   if (ncrn & 1)
6405     {
6406       int res = arm_needs_doubleword_align (mode, type);
6407       /* Only warn during RTL expansion of call stmts, otherwise we would
6408          warn e.g. during gimplification even on functions that will be
6409          always inlined, and we'd warn multiple times.  Don't warn when
6410          called in expand_function_start either, as we warn instead in
6411          arm_function_arg_boundary in that case.  */
6412       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6413         inform (input_location, "parameter passing for argument of type "
6414                 "%qT changed in GCC 7.1", type);
6415       else if (res > 0)
6416         ncrn++;
6417     }
6418
6419   nregs = ARM_NUM_REGS2(mode, type);
6420
6421   /* Sigh, this test should really assert that nregs > 0, but a GCC
6422      extension allows empty structs and then gives them empty size; it
6423      then allows such a structure to be passed by value.  For some of
6424      the code below we have to pretend that such an argument has
6425      non-zero size so that we 'locate' it correctly either in
6426      registers or on the stack.  */
6427   gcc_assert (nregs >= 0);
6428
6429   nregs2 = nregs ? nregs : 1;
6430
6431   /* C4 - Argument fits entirely in core registers.  */
6432   if (ncrn + nregs2 <= NUM_ARG_REGS)
6433     {
6434       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6435       pcum->aapcs_next_ncrn = ncrn + nregs;
6436       return;
6437     }
6438
6439   /* C5 - Some core registers left and there are no arguments already
6440      on the stack: split this argument between the remaining core
6441      registers and the stack.  */
6442   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6443     {
6444       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6445       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6446       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6447       return;
6448     }
6449
6450   /* C6 - NCRN is set to 4.  */
6451   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6452
6453   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6454   return;
6455 }
6456
6457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6458    for a call to a function whose data type is FNTYPE.
6459    For a library call, FNTYPE is NULL.  */
6460 void
6461 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6462                           rtx libname,
6463                           tree fndecl ATTRIBUTE_UNUSED)
6464 {
6465   /* Long call handling.  */
6466   if (fntype)
6467     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6468   else
6469     pcum->pcs_variant = arm_pcs_default;
6470
6471   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6472     {
6473       if (arm_libcall_uses_aapcs_base (libname))
6474         pcum->pcs_variant = ARM_PCS_AAPCS;
6475
6476       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6477       pcum->aapcs_reg = NULL_RTX;
6478       pcum->aapcs_partial = 0;
6479       pcum->aapcs_arg_processed = false;
6480       pcum->aapcs_cprc_slot = -1;
6481       pcum->can_split = true;
6482
6483       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6484         {
6485           int i;
6486
6487           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6488             {
6489               pcum->aapcs_cprc_failed[i] = false;
6490               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6491             }
6492         }
6493       return;
6494     }
6495
6496   /* Legacy ABIs */
6497
6498   /* On the ARM, the offset starts at 0.  */
6499   pcum->nregs = 0;
6500   pcum->iwmmxt_nregs = 0;
6501   pcum->can_split = true;
6502
6503   /* Varargs vectors are treated the same as long long.
6504      named_count avoids having to change the way arm handles 'named' */
6505   pcum->named_count = 0;
6506   pcum->nargs = 0;
6507
6508   if (TARGET_REALLY_IWMMXT && fntype)
6509     {
6510       tree fn_arg;
6511
6512       for (fn_arg = TYPE_ARG_TYPES (fntype);
6513            fn_arg;
6514            fn_arg = TREE_CHAIN (fn_arg))
6515         pcum->named_count += 1;
6516
6517       if (! pcum->named_count)
6518         pcum->named_count = INT_MAX;
6519     }
6520 }
6521
6522 /* Return 1 if double word alignment is required for argument passing.
6523    Return -1 if double word alignment used to be required for argument
6524    passing before PR77728 ABI fix, but is not required anymore.
6525    Return 0 if double word alignment is not required and wasn't requried
6526    before either.  */
6527 static int
6528 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6529 {
6530   if (!type)
6531     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6532
6533   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6534   if (!AGGREGATE_TYPE_P (type))
6535     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6536
6537   /* Array types: Use member alignment of element type.  */
6538   if (TREE_CODE (type) == ARRAY_TYPE)
6539     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6540
6541   int ret = 0;
6542   /* Record/aggregate types: Use greatest member alignment of any member.  */
6543   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6544     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6545       {
6546         if (TREE_CODE (field) == FIELD_DECL)
6547           return 1;
6548         else
6549           /* Before PR77728 fix, we were incorrectly considering also
6550              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6551              Make sure we can warn about that with -Wpsabi.  */
6552           ret = -1;
6553       }
6554
6555   return ret;
6556 }
6557
6558
6559 /* Determine where to put an argument to a function.
6560    Value is zero to push the argument on the stack,
6561    or a hard register in which to store the argument.
6562
6563    MODE is the argument's machine mode.
6564    TYPE is the data type of the argument (as a tree).
6565     This is null for libcalls where that information may
6566     not be available.
6567    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6568     the preceding args and about the function being called.
6569    NAMED is nonzero if this argument is a named parameter
6570     (otherwise it is an extra parameter matching an ellipsis).
6571
6572    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6573    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6574    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6575    defined), say it is passed in the stack (function_prologue will
6576    indeed make it pass in the stack if necessary).  */
6577
6578 static rtx
6579 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6580                   const_tree type, bool named)
6581 {
6582   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6583   int nregs;
6584
6585   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6586      a call insn (op3 of a call_value insn).  */
6587   if (mode == VOIDmode)
6588     return const0_rtx;
6589
6590   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6591     {
6592       aapcs_layout_arg (pcum, mode, type, named);
6593       return pcum->aapcs_reg;
6594     }
6595
6596   /* Varargs vectors are treated the same as long long.
6597      named_count avoids having to change the way arm handles 'named' */
6598   if (TARGET_IWMMXT_ABI
6599       && arm_vector_mode_supported_p (mode)
6600       && pcum->named_count > pcum->nargs + 1)
6601     {
6602       if (pcum->iwmmxt_nregs <= 9)
6603         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6604       else
6605         {
6606           pcum->can_split = false;
6607           return NULL_RTX;
6608         }
6609     }
6610
6611   /* Put doubleword aligned quantities in even register pairs.  */
6612   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6613     {
6614       int res = arm_needs_doubleword_align (mode, type);
6615       if (res < 0 && warn_psabi)
6616         inform (input_location, "parameter passing for argument of type "
6617                 "%qT changed in GCC 7.1", type);
6618       else if (res > 0)
6619         pcum->nregs++;
6620     }
6621
6622   /* Only allow splitting an arg between regs and memory if all preceding
6623      args were allocated to regs.  For args passed by reference we only count
6624      the reference pointer.  */
6625   if (pcum->can_split)
6626     nregs = 1;
6627   else
6628     nregs = ARM_NUM_REGS2 (mode, type);
6629
6630   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6631     return NULL_RTX;
6632
6633   return gen_rtx_REG (mode, pcum->nregs);
6634 }
6635
6636 static unsigned int
6637 arm_function_arg_boundary (machine_mode mode, const_tree type)
6638 {
6639   if (!ARM_DOUBLEWORD_ALIGN)
6640     return PARM_BOUNDARY;
6641
6642   int res = arm_needs_doubleword_align (mode, type);
6643   if (res < 0 && warn_psabi)
6644     inform (input_location, "parameter passing for argument of type %qT "
6645             "changed in GCC 7.1", type);
6646
6647   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6648 }
6649
6650 static int
6651 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6652                        tree type, bool named)
6653 {
6654   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6655   int nregs = pcum->nregs;
6656
6657   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6658     {
6659       aapcs_layout_arg (pcum, mode, type, named);
6660       return pcum->aapcs_partial;
6661     }
6662
6663   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6664     return 0;
6665
6666   if (NUM_ARG_REGS > nregs
6667       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6668       && pcum->can_split)
6669     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6670
6671   return 0;
6672 }
6673
6674 /* Update the data in PCUM to advance over an argument
6675    of mode MODE and data type TYPE.
6676    (TYPE is null for libcalls where that information may not be available.)  */
6677
6678 static void
6679 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6680                           const_tree type, bool named)
6681 {
6682   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6683
6684   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6685     {
6686       aapcs_layout_arg (pcum, mode, type, named);
6687
6688       if (pcum->aapcs_cprc_slot >= 0)
6689         {
6690           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6691                                                               type);
6692           pcum->aapcs_cprc_slot = -1;
6693         }
6694
6695       /* Generic stuff.  */
6696       pcum->aapcs_arg_processed = false;
6697       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6698       pcum->aapcs_reg = NULL_RTX;
6699       pcum->aapcs_partial = 0;
6700     }
6701   else
6702     {
6703       pcum->nargs += 1;
6704       if (arm_vector_mode_supported_p (mode)
6705           && pcum->named_count > pcum->nargs
6706           && TARGET_IWMMXT_ABI)
6707         pcum->iwmmxt_nregs += 1;
6708       else
6709         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6710     }
6711 }
6712
6713 /* Variable sized types are passed by reference.  This is a GCC
6714    extension to the ARM ABI.  */
6715
6716 static bool
6717 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6718                        machine_mode mode ATTRIBUTE_UNUSED,
6719                        const_tree type, bool named ATTRIBUTE_UNUSED)
6720 {
6721   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6722 }
6723 \f
6724 /* Encode the current state of the #pragma [no_]long_calls.  */
6725 typedef enum
6726 {
6727   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6728   LONG,         /* #pragma long_calls is in effect.  */
6729   SHORT         /* #pragma no_long_calls is in effect.  */
6730 } arm_pragma_enum;
6731
6732 static arm_pragma_enum arm_pragma_long_calls = OFF;
6733
6734 void
6735 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6736 {
6737   arm_pragma_long_calls = LONG;
6738 }
6739
6740 void
6741 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6742 {
6743   arm_pragma_long_calls = SHORT;
6744 }
6745
6746 void
6747 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6748 {
6749   arm_pragma_long_calls = OFF;
6750 }
6751 \f
6752 /* Handle an attribute requiring a FUNCTION_DECL;
6753    arguments as in struct attribute_spec.handler.  */
6754 static tree
6755 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6756                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6757 {
6758   if (TREE_CODE (*node) != FUNCTION_DECL)
6759     {
6760       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6761                name);
6762       *no_add_attrs = true;
6763     }
6764
6765   return NULL_TREE;
6766 }
6767
6768 /* Handle an "interrupt" or "isr" attribute;
6769    arguments as in struct attribute_spec.handler.  */
6770 static tree
6771 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6772                           bool *no_add_attrs)
6773 {
6774   if (DECL_P (*node))
6775     {
6776       if (TREE_CODE (*node) != FUNCTION_DECL)
6777         {
6778           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6779                    name);
6780           *no_add_attrs = true;
6781         }
6782       /* FIXME: the argument if any is checked for type attributes;
6783          should it be checked for decl ones?  */
6784     }
6785   else
6786     {
6787       if (TREE_CODE (*node) == FUNCTION_TYPE
6788           || TREE_CODE (*node) == METHOD_TYPE)
6789         {
6790           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6791             {
6792               warning (OPT_Wattributes, "%qE attribute ignored",
6793                        name);
6794               *no_add_attrs = true;
6795             }
6796         }
6797       else if (TREE_CODE (*node) == POINTER_TYPE
6798                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6799                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6800                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6801         {
6802           *node = build_variant_type_copy (*node);
6803           TREE_TYPE (*node) = build_type_attribute_variant
6804             (TREE_TYPE (*node),
6805              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6806           *no_add_attrs = true;
6807         }
6808       else
6809         {
6810           /* Possibly pass this attribute on from the type to a decl.  */
6811           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6812                        | (int) ATTR_FLAG_FUNCTION_NEXT
6813                        | (int) ATTR_FLAG_ARRAY_NEXT))
6814             {
6815               *no_add_attrs = true;
6816               return tree_cons (name, args, NULL_TREE);
6817             }
6818           else
6819             {
6820               warning (OPT_Wattributes, "%qE attribute ignored",
6821                        name);
6822             }
6823         }
6824     }
6825
6826   return NULL_TREE;
6827 }
6828
6829 /* Handle a "pcs" attribute; arguments as in struct
6830    attribute_spec.handler.  */
6831 static tree
6832 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6833                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6834 {
6835   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6836     {
6837       warning (OPT_Wattributes, "%qE attribute ignored", name);
6838       *no_add_attrs = true;
6839     }
6840   return NULL_TREE;
6841 }
6842
6843 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6844 /* Handle the "notshared" attribute.  This attribute is another way of
6845    requesting hidden visibility.  ARM's compiler supports
6846    "__declspec(notshared)"; we support the same thing via an
6847    attribute.  */
6848
6849 static tree
6850 arm_handle_notshared_attribute (tree *node,
6851                                 tree name ATTRIBUTE_UNUSED,
6852                                 tree args ATTRIBUTE_UNUSED,
6853                                 int flags ATTRIBUTE_UNUSED,
6854                                 bool *no_add_attrs)
6855 {
6856   tree decl = TYPE_NAME (*node);
6857
6858   if (decl)
6859     {
6860       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6861       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6862       *no_add_attrs = false;
6863     }
6864   return NULL_TREE;
6865 }
6866 #endif
6867
6868 /* This function returns true if a function with declaration FNDECL and type
6869    FNTYPE uses the stack to pass arguments or return variables and false
6870    otherwise.  This is used for functions with the attributes
6871    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6872    diagnostic messages if the stack is used.  NAME is the name of the attribute
6873    used.  */
6874
6875 static bool
6876 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6877 {
6878   function_args_iterator args_iter;
6879   CUMULATIVE_ARGS args_so_far_v;
6880   cumulative_args_t args_so_far;
6881   bool first_param = true;
6882   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6883
6884   /* Error out if any argument is passed on the stack.  */
6885   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6886   args_so_far = pack_cumulative_args (&args_so_far_v);
6887   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6888     {
6889       rtx arg_rtx;
6890       machine_mode arg_mode = TYPE_MODE (arg_type);
6891
6892       prev_arg_type = arg_type;
6893       if (VOID_TYPE_P (arg_type))
6894         continue;
6895
6896       if (!first_param)
6897         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6898       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6899       if (!arg_rtx
6900           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6901         {
6902           error ("%qE attribute not available to functions with arguments "
6903                  "passed on the stack", name);
6904           return true;
6905         }
6906       first_param = false;
6907     }
6908
6909   /* Error out for variadic functions since we cannot control how many
6910      arguments will be passed and thus stack could be used.  stdarg_p () is not
6911      used for the checking to avoid browsing arguments twice.  */
6912   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6913     {
6914       error ("%qE attribute not available to functions with variable number "
6915              "of arguments", name);
6916       return true;
6917     }
6918
6919   /* Error out if return value is passed on the stack.  */
6920   ret_type = TREE_TYPE (fntype);
6921   if (arm_return_in_memory (ret_type, fntype))
6922     {
6923       error ("%qE attribute not available to functions that return value on "
6924              "the stack", name);
6925       return true;
6926     }
6927   return false;
6928 }
6929
6930 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6931    function will check whether the attribute is allowed here and will add the
6932    attribute to the function declaration tree or otherwise issue a warning.  */
6933
6934 static tree
6935 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6936                                  tree /* args */,
6937                                  int /* flags */,
6938                                  bool *no_add_attrs)
6939 {
6940   tree fndecl;
6941
6942   if (!use_cmse)
6943     {
6944       *no_add_attrs = true;
6945       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6946                name);
6947       return NULL_TREE;
6948     }
6949
6950   /* Ignore attribute for function types.  */
6951   if (TREE_CODE (*node) != FUNCTION_DECL)
6952     {
6953       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6954                name);
6955       *no_add_attrs = true;
6956       return NULL_TREE;
6957     }
6958
6959   fndecl = *node;
6960
6961   /* Warn for static linkage functions.  */
6962   if (!TREE_PUBLIC (fndecl))
6963     {
6964       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6965                "with static linkage", name);
6966       *no_add_attrs = true;
6967       return NULL_TREE;
6968     }
6969
6970   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6971                                                 TREE_TYPE (fndecl));
6972   return NULL_TREE;
6973 }
6974
6975
6976 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6977    function will check whether the attribute is allowed here and will add the
6978    attribute to the function type tree or otherwise issue a diagnostic.  The
6979    reason we check this at declaration time is to only allow the use of the
6980    attribute with declarations of function pointers and not function
6981    declarations.  This function checks NODE is of the expected type and issues
6982    diagnostics otherwise using NAME.  If it is not of the expected type
6983    *NO_ADD_ATTRS will be set to true.  */
6984
6985 static tree
6986 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6987                                  tree /* args */,
6988                                  int /* flags */,
6989                                  bool *no_add_attrs)
6990 {
6991   tree decl = NULL_TREE, fntype = NULL_TREE;
6992   tree type;
6993
6994   if (!use_cmse)
6995     {
6996       *no_add_attrs = true;
6997       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6998                name);
6999       return NULL_TREE;
7000     }
7001
7002   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7003     {
7004       decl = *node;
7005       fntype = TREE_TYPE (decl);
7006     }
7007
7008   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7009     fntype = TREE_TYPE (fntype);
7010
7011   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7012     {
7013         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7014                  "function pointer", name);
7015         *no_add_attrs = true;
7016         return NULL_TREE;
7017     }
7018
7019   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7020
7021   if (*no_add_attrs)
7022     return NULL_TREE;
7023
7024   /* Prevent trees being shared among function types with and without
7025      cmse_nonsecure_call attribute.  */
7026   type = TREE_TYPE (decl);
7027
7028   type = build_distinct_type_copy (type);
7029   TREE_TYPE (decl) = type;
7030   fntype = type;
7031
7032   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7033     {
7034       type = fntype;
7035       fntype = TREE_TYPE (fntype);
7036       fntype = build_distinct_type_copy (fntype);
7037       TREE_TYPE (type) = fntype;
7038     }
7039
7040   /* Construct a type attribute and add it to the function type.  */
7041   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7042                           TYPE_ATTRIBUTES (fntype));
7043   TYPE_ATTRIBUTES (fntype) = attrs;
7044   return NULL_TREE;
7045 }
7046
7047 /* Return 0 if the attributes for two types are incompatible, 1 if they
7048    are compatible, and 2 if they are nearly compatible (which causes a
7049    warning to be generated).  */
7050 static int
7051 arm_comp_type_attributes (const_tree type1, const_tree type2)
7052 {
7053   int l1, l2, s1, s2;
7054
7055   /* Check for mismatch of non-default calling convention.  */
7056   if (TREE_CODE (type1) != FUNCTION_TYPE)
7057     return 1;
7058
7059   /* Check for mismatched call attributes.  */
7060   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7061   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7062   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7063   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7064
7065   /* Only bother to check if an attribute is defined.  */
7066   if (l1 | l2 | s1 | s2)
7067     {
7068       /* If one type has an attribute, the other must have the same attribute.  */
7069       if ((l1 != l2) || (s1 != s2))
7070         return 0;
7071
7072       /* Disallow mixed attributes.  */
7073       if ((l1 & s2) || (l2 & s1))
7074         return 0;
7075     }
7076
7077   /* Check for mismatched ISR attribute.  */
7078   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7079   if (! l1)
7080     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7081   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7082   if (! l2)
7083     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7084   if (l1 != l2)
7085     return 0;
7086
7087   l1 = lookup_attribute ("cmse_nonsecure_call",
7088                          TYPE_ATTRIBUTES (type1)) != NULL;
7089   l2 = lookup_attribute ("cmse_nonsecure_call",
7090                          TYPE_ATTRIBUTES (type2)) != NULL;
7091
7092   if (l1 != l2)
7093     return 0;
7094
7095   return 1;
7096 }
7097
7098 /*  Assigns default attributes to newly defined type.  This is used to
7099     set short_call/long_call attributes for function types of
7100     functions defined inside corresponding #pragma scopes.  */
7101 static void
7102 arm_set_default_type_attributes (tree type)
7103 {
7104   /* Add __attribute__ ((long_call)) to all functions, when
7105      inside #pragma long_calls or __attribute__ ((short_call)),
7106      when inside #pragma no_long_calls.  */
7107   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7108     {
7109       tree type_attr_list, attr_name;
7110       type_attr_list = TYPE_ATTRIBUTES (type);
7111
7112       if (arm_pragma_long_calls == LONG)
7113         attr_name = get_identifier ("long_call");
7114       else if (arm_pragma_long_calls == SHORT)
7115         attr_name = get_identifier ("short_call");
7116       else
7117         return;
7118
7119       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7120       TYPE_ATTRIBUTES (type) = type_attr_list;
7121     }
7122 }
7123 \f
7124 /* Return true if DECL is known to be linked into section SECTION.  */
7125
7126 static bool
7127 arm_function_in_section_p (tree decl, section *section)
7128 {
7129   /* We can only be certain about the prevailing symbol definition.  */
7130   if (!decl_binds_to_current_def_p (decl))
7131     return false;
7132
7133   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7134   if (!DECL_SECTION_NAME (decl))
7135     {
7136       /* Make sure that we will not create a unique section for DECL.  */
7137       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7138         return false;
7139     }
7140
7141   return function_section (decl) == section;
7142 }
7143
7144 /* Return nonzero if a 32-bit "long_call" should be generated for
7145    a call from the current function to DECL.  We generate a long_call
7146    if the function:
7147
7148         a.  has an __attribute__((long call))
7149      or b.  is within the scope of a #pragma long_calls
7150      or c.  the -mlong-calls command line switch has been specified
7151
7152    However we do not generate a long call if the function:
7153
7154         d.  has an __attribute__ ((short_call))
7155      or e.  is inside the scope of a #pragma no_long_calls
7156      or f.  is defined in the same section as the current function.  */
7157
7158 bool
7159 arm_is_long_call_p (tree decl)
7160 {
7161   tree attrs;
7162
7163   if (!decl)
7164     return TARGET_LONG_CALLS;
7165
7166   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7167   if (lookup_attribute ("short_call", attrs))
7168     return false;
7169
7170   /* For "f", be conservative, and only cater for cases in which the
7171      whole of the current function is placed in the same section.  */
7172   if (!flag_reorder_blocks_and_partition
7173       && TREE_CODE (decl) == FUNCTION_DECL
7174       && arm_function_in_section_p (decl, current_function_section ()))
7175     return false;
7176
7177   if (lookup_attribute ("long_call", attrs))
7178     return true;
7179
7180   return TARGET_LONG_CALLS;
7181 }
7182
7183 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7184 static bool
7185 arm_function_ok_for_sibcall (tree decl, tree exp)
7186 {
7187   unsigned long func_type;
7188
7189   if (cfun->machine->sibcall_blocked)
7190     return false;
7191
7192   /* Never tailcall something if we are generating code for Thumb-1.  */
7193   if (TARGET_THUMB1)
7194     return false;
7195
7196   /* The PIC register is live on entry to VxWorks PLT entries, so we
7197      must make the call before restoring the PIC register.  */
7198   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7199     return false;
7200
7201   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7202      may be used both as target of the call and base register for restoring
7203      the VFP registers  */
7204   if (TARGET_APCS_FRAME && TARGET_ARM
7205       && TARGET_HARD_FLOAT
7206       && decl && arm_is_long_call_p (decl))
7207     return false;
7208
7209   /* If we are interworking and the function is not declared static
7210      then we can't tail-call it unless we know that it exists in this
7211      compilation unit (since it might be a Thumb routine).  */
7212   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7213       && !TREE_ASM_WRITTEN (decl))
7214     return false;
7215
7216   func_type = arm_current_func_type ();
7217   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7218   if (IS_INTERRUPT (func_type))
7219     return false;
7220
7221   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7222      generated for entry functions themselves.  */
7223   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7224     return false;
7225
7226   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7227      this would complicate matters for later code generation.  */
7228   if (TREE_CODE (exp) == CALL_EXPR)
7229     {
7230       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7231       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7232         return false;
7233     }
7234
7235   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7236     {
7237       /* Check that the return value locations are the same.  For
7238          example that we aren't returning a value from the sibling in
7239          a VFP register but then need to transfer it to a core
7240          register.  */
7241       rtx a, b;
7242       tree decl_or_type = decl;
7243
7244       /* If it is an indirect function pointer, get the function type.  */
7245       if (!decl)
7246         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7247
7248       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7249       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7250                               cfun->decl, false);
7251       if (!rtx_equal_p (a, b))
7252         return false;
7253     }
7254
7255   /* Never tailcall if function may be called with a misaligned SP.  */
7256   if (IS_STACKALIGN (func_type))
7257     return false;
7258
7259   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7260      references should become a NOP.  Don't convert such calls into
7261      sibling calls.  */
7262   if (TARGET_AAPCS_BASED
7263       && arm_abi == ARM_ABI_AAPCS
7264       && decl
7265       && DECL_WEAK (decl))
7266     return false;
7267
7268   /* We cannot do a tailcall for an indirect call by descriptor if all the
7269      argument registers are used because the only register left to load the
7270      address is IP and it will already contain the static chain.  */
7271   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7272     {
7273       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7274       CUMULATIVE_ARGS cum;
7275       cumulative_args_t cum_v;
7276
7277       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7278       cum_v = pack_cumulative_args (&cum);
7279
7280       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7281         {
7282           tree type = TREE_VALUE (t);
7283           if (!VOID_TYPE_P (type))
7284             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7285         }
7286
7287       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7288         return false;
7289     }
7290
7291   /* Everything else is ok.  */
7292   return true;
7293 }
7294
7295 \f
7296 /* Addressing mode support functions.  */
7297
7298 /* Return nonzero if X is a legitimate immediate operand when compiling
7299    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7300 int
7301 legitimate_pic_operand_p (rtx x)
7302 {
7303   if (GET_CODE (x) == SYMBOL_REF
7304       || (GET_CODE (x) == CONST
7305           && GET_CODE (XEXP (x, 0)) == PLUS
7306           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7307     return 0;
7308
7309   return 1;
7310 }
7311
7312 /* Record that the current function needs a PIC register.  Initialize
7313    cfun->machine->pic_reg if we have not already done so.  */
7314
7315 static void
7316 require_pic_register (void)
7317 {
7318   /* A lot of the logic here is made obscure by the fact that this
7319      routine gets called as part of the rtx cost estimation process.
7320      We don't want those calls to affect any assumptions about the real
7321      function; and further, we can't call entry_of_function() until we
7322      start the real expansion process.  */
7323   if (!crtl->uses_pic_offset_table)
7324     {
7325       gcc_assert (can_create_pseudo_p ());
7326       if (arm_pic_register != INVALID_REGNUM
7327           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7328         {
7329           if (!cfun->machine->pic_reg)
7330             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7331
7332           /* Play games to avoid marking the function as needing pic
7333              if we are being called as part of the cost-estimation
7334              process.  */
7335           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7336             crtl->uses_pic_offset_table = 1;
7337         }
7338       else
7339         {
7340           rtx_insn *seq, *insn;
7341
7342           if (!cfun->machine->pic_reg)
7343             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7344
7345           /* Play games to avoid marking the function as needing pic
7346              if we are being called as part of the cost-estimation
7347              process.  */
7348           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7349             {
7350               crtl->uses_pic_offset_table = 1;
7351               start_sequence ();
7352
7353               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7354                   && arm_pic_register > LAST_LO_REGNUM)
7355                 emit_move_insn (cfun->machine->pic_reg,
7356                                 gen_rtx_REG (Pmode, arm_pic_register));
7357               else
7358                 arm_load_pic_register (0UL);
7359
7360               seq = get_insns ();
7361               end_sequence ();
7362
7363               for (insn = seq; insn; insn = NEXT_INSN (insn))
7364                 if (INSN_P (insn))
7365                   INSN_LOCATION (insn) = prologue_location;
7366
7367               /* We can be called during expansion of PHI nodes, where
7368                  we can't yet emit instructions directly in the final
7369                  insn stream.  Queue the insns on the entry edge, they will
7370                  be committed after everything else is expanded.  */
7371               insert_insn_on_edge (seq,
7372                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7373             }
7374         }
7375     }
7376 }
7377
7378 rtx
7379 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7380 {
7381   if (GET_CODE (orig) == SYMBOL_REF
7382       || GET_CODE (orig) == LABEL_REF)
7383     {
7384       if (reg == 0)
7385         {
7386           gcc_assert (can_create_pseudo_p ());
7387           reg = gen_reg_rtx (Pmode);
7388         }
7389
7390       /* VxWorks does not impose a fixed gap between segments; the run-time
7391          gap can be different from the object-file gap.  We therefore can't
7392          use GOTOFF unless we are absolutely sure that the symbol is in the
7393          same segment as the GOT.  Unfortunately, the flexibility of linker
7394          scripts means that we can't be sure of that in general, so assume
7395          that GOTOFF is never valid on VxWorks.  */
7396       /* References to weak symbols cannot be resolved locally: they
7397          may be overridden by a non-weak definition at link time.  */
7398       rtx_insn *insn;
7399       if ((GET_CODE (orig) == LABEL_REF
7400            || (GET_CODE (orig) == SYMBOL_REF
7401                && SYMBOL_REF_LOCAL_P (orig)
7402                && (SYMBOL_REF_DECL (orig)
7403                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7404           && NEED_GOT_RELOC
7405           && arm_pic_data_is_text_relative)
7406         insn = arm_pic_static_addr (orig, reg);
7407       else
7408         {
7409           rtx pat;
7410           rtx mem;
7411
7412           /* If this function doesn't have a pic register, create one now.  */
7413           require_pic_register ();
7414
7415           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7416
7417           /* Make the MEM as close to a constant as possible.  */
7418           mem = SET_SRC (pat);
7419           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7420           MEM_READONLY_P (mem) = 1;
7421           MEM_NOTRAP_P (mem) = 1;
7422
7423           insn = emit_insn (pat);
7424         }
7425
7426       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7427          by loop.  */
7428       set_unique_reg_note (insn, REG_EQUAL, orig);
7429
7430       return reg;
7431     }
7432   else if (GET_CODE (orig) == CONST)
7433     {
7434       rtx base, offset;
7435
7436       if (GET_CODE (XEXP (orig, 0)) == PLUS
7437           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7438         return orig;
7439
7440       /* Handle the case where we have: const (UNSPEC_TLS).  */
7441       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7442           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7443         return orig;
7444
7445       /* Handle the case where we have:
7446          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7447          CONST_INT.  */
7448       if (GET_CODE (XEXP (orig, 0)) == PLUS
7449           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7450           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7451         {
7452           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7453           return orig;
7454         }
7455
7456       if (reg == 0)
7457         {
7458           gcc_assert (can_create_pseudo_p ());
7459           reg = gen_reg_rtx (Pmode);
7460         }
7461
7462       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7463
7464       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7465       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7466                                        base == reg ? 0 : reg);
7467
7468       if (CONST_INT_P (offset))
7469         {
7470           /* The base register doesn't really matter, we only want to
7471              test the index for the appropriate mode.  */
7472           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7473             {
7474               gcc_assert (can_create_pseudo_p ());
7475               offset = force_reg (Pmode, offset);
7476             }
7477
7478           if (CONST_INT_P (offset))
7479             return plus_constant (Pmode, base, INTVAL (offset));
7480         }
7481
7482       if (GET_MODE_SIZE (mode) > 4
7483           && (GET_MODE_CLASS (mode) == MODE_INT
7484               || TARGET_SOFT_FLOAT))
7485         {
7486           emit_insn (gen_addsi3 (reg, base, offset));
7487           return reg;
7488         }
7489
7490       return gen_rtx_PLUS (Pmode, base, offset);
7491     }
7492
7493   return orig;
7494 }
7495
7496
7497 /* Find a spare register to use during the prolog of a function.  */
7498
7499 static int
7500 thumb_find_work_register (unsigned long pushed_regs_mask)
7501 {
7502   int reg;
7503
7504   /* Check the argument registers first as these are call-used.  The
7505      register allocation order means that sometimes r3 might be used
7506      but earlier argument registers might not, so check them all.  */
7507   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7508     if (!df_regs_ever_live_p (reg))
7509       return reg;
7510
7511   /* Before going on to check the call-saved registers we can try a couple
7512      more ways of deducing that r3 is available.  The first is when we are
7513      pushing anonymous arguments onto the stack and we have less than 4
7514      registers worth of fixed arguments(*).  In this case r3 will be part of
7515      the variable argument list and so we can be sure that it will be
7516      pushed right at the start of the function.  Hence it will be available
7517      for the rest of the prologue.
7518      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7519   if (cfun->machine->uses_anonymous_args
7520       && crtl->args.pretend_args_size > 0)
7521     return LAST_ARG_REGNUM;
7522
7523   /* The other case is when we have fixed arguments but less than 4 registers
7524      worth.  In this case r3 might be used in the body of the function, but
7525      it is not being used to convey an argument into the function.  In theory
7526      we could just check crtl->args.size to see how many bytes are
7527      being passed in argument registers, but it seems that it is unreliable.
7528      Sometimes it will have the value 0 when in fact arguments are being
7529      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7530      check the args_info.nregs field as well.  The problem with this field is
7531      that it makes no allowances for arguments that are passed to the
7532      function but which are not used.  Hence we could miss an opportunity
7533      when a function has an unused argument in r3.  But it is better to be
7534      safe than to be sorry.  */
7535   if (! cfun->machine->uses_anonymous_args
7536       && crtl->args.size >= 0
7537       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7538       && (TARGET_AAPCS_BASED
7539           ? crtl->args.info.aapcs_ncrn < 4
7540           : crtl->args.info.nregs < 4))
7541     return LAST_ARG_REGNUM;
7542
7543   /* Otherwise look for a call-saved register that is going to be pushed.  */
7544   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7545     if (pushed_regs_mask & (1 << reg))
7546       return reg;
7547
7548   if (TARGET_THUMB2)
7549     {
7550       /* Thumb-2 can use high regs.  */
7551       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7552         if (pushed_regs_mask & (1 << reg))
7553           return reg;
7554     }
7555   /* Something went wrong - thumb_compute_save_reg_mask()
7556      should have arranged for a suitable register to be pushed.  */
7557   gcc_unreachable ();
7558 }
7559
7560 static GTY(()) int pic_labelno;
7561
7562 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7563    low register.  */
7564
7565 void
7566 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7567 {
7568   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7569
7570   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7571     return;
7572
7573   gcc_assert (flag_pic);
7574
7575   pic_reg = cfun->machine->pic_reg;
7576   if (TARGET_VXWORKS_RTP)
7577     {
7578       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7579       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7580       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7581
7582       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7583
7584       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7585       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7586     }
7587   else
7588     {
7589       /* We use an UNSPEC rather than a LABEL_REF because this label
7590          never appears in the code stream.  */
7591
7592       labelno = GEN_INT (pic_labelno++);
7593       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7594       l1 = gen_rtx_CONST (VOIDmode, l1);
7595
7596       /* On the ARM the PC register contains 'dot + 8' at the time of the
7597          addition, on the Thumb it is 'dot + 4'.  */
7598       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7599       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7600                                 UNSPEC_GOTSYM_OFF);
7601       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7602
7603       if (TARGET_32BIT)
7604         {
7605           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7606         }
7607       else /* TARGET_THUMB1 */
7608         {
7609           if (arm_pic_register != INVALID_REGNUM
7610               && REGNO (pic_reg) > LAST_LO_REGNUM)
7611             {
7612               /* We will have pushed the pic register, so we should always be
7613                  able to find a work register.  */
7614               pic_tmp = gen_rtx_REG (SImode,
7615                                      thumb_find_work_register (saved_regs));
7616               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7617               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7618               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7619             }
7620           else if (arm_pic_register != INVALID_REGNUM
7621                    && arm_pic_register > LAST_LO_REGNUM
7622                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7623             {
7624               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7625               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7626               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7627             }
7628           else
7629             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7630         }
7631     }
7632
7633   /* Need to emit this whether or not we obey regdecls,
7634      since setjmp/longjmp can cause life info to screw up.  */
7635   emit_use (pic_reg);
7636 }
7637
7638 /* Generate code to load the address of a static var when flag_pic is set.  */
7639 static rtx_insn *
7640 arm_pic_static_addr (rtx orig, rtx reg)
7641 {
7642   rtx l1, labelno, offset_rtx;
7643
7644   gcc_assert (flag_pic);
7645
7646   /* We use an UNSPEC rather than a LABEL_REF because this label
7647      never appears in the code stream.  */
7648   labelno = GEN_INT (pic_labelno++);
7649   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7650   l1 = gen_rtx_CONST (VOIDmode, l1);
7651
7652   /* On the ARM the PC register contains 'dot + 8' at the time of the
7653      addition, on the Thumb it is 'dot + 4'.  */
7654   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7655   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7656                                UNSPEC_SYMBOL_OFFSET);
7657   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7658
7659   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7660 }
7661
7662 /* Return nonzero if X is valid as an ARM state addressing register.  */
7663 static int
7664 arm_address_register_rtx_p (rtx x, int strict_p)
7665 {
7666   int regno;
7667
7668   if (!REG_P (x))
7669     return 0;
7670
7671   regno = REGNO (x);
7672
7673   if (strict_p)
7674     return ARM_REGNO_OK_FOR_BASE_P (regno);
7675
7676   return (regno <= LAST_ARM_REGNUM
7677           || regno >= FIRST_PSEUDO_REGISTER
7678           || regno == FRAME_POINTER_REGNUM
7679           || regno == ARG_POINTER_REGNUM);
7680 }
7681
7682 /* Return TRUE if this rtx is the difference of a symbol and a label,
7683    and will reduce to a PC-relative relocation in the object file.
7684    Expressions like this can be left alone when generating PIC, rather
7685    than forced through the GOT.  */
7686 static int
7687 pcrel_constant_p (rtx x)
7688 {
7689   if (GET_CODE (x) == MINUS)
7690     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7691
7692   return FALSE;
7693 }
7694
7695 /* Return true if X will surely end up in an index register after next
7696    splitting pass.  */
7697 static bool
7698 will_be_in_index_register (const_rtx x)
7699 {
7700   /* arm.md: calculate_pic_address will split this into a register.  */
7701   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7702 }
7703
7704 /* Return nonzero if X is a valid ARM state address operand.  */
7705 int
7706 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7707                                 int strict_p)
7708 {
7709   bool use_ldrd;
7710   enum rtx_code code = GET_CODE (x);
7711
7712   if (arm_address_register_rtx_p (x, strict_p))
7713     return 1;
7714
7715   use_ldrd = (TARGET_LDRD
7716               && (mode == DImode || mode == DFmode));
7717
7718   if (code == POST_INC || code == PRE_DEC
7719       || ((code == PRE_INC || code == POST_DEC)
7720           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7721     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7722
7723   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7724            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7725            && GET_CODE (XEXP (x, 1)) == PLUS
7726            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7727     {
7728       rtx addend = XEXP (XEXP (x, 1), 1);
7729
7730       /* Don't allow ldrd post increment by register because it's hard
7731          to fixup invalid register choices.  */
7732       if (use_ldrd
7733           && GET_CODE (x) == POST_MODIFY
7734           && REG_P (addend))
7735         return 0;
7736
7737       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7738               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7739     }
7740
7741   /* After reload constants split into minipools will have addresses
7742      from a LABEL_REF.  */
7743   else if (reload_completed
7744            && (code == LABEL_REF
7745                || (code == CONST
7746                    && GET_CODE (XEXP (x, 0)) == PLUS
7747                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7748                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7749     return 1;
7750
7751   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7752     return 0;
7753
7754   else if (code == PLUS)
7755     {
7756       rtx xop0 = XEXP (x, 0);
7757       rtx xop1 = XEXP (x, 1);
7758
7759       return ((arm_address_register_rtx_p (xop0, strict_p)
7760                && ((CONST_INT_P (xop1)
7761                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7762                    || (!strict_p && will_be_in_index_register (xop1))))
7763               || (arm_address_register_rtx_p (xop1, strict_p)
7764                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7765     }
7766
7767 #if 0
7768   /* Reload currently can't handle MINUS, so disable this for now */
7769   else if (GET_CODE (x) == MINUS)
7770     {
7771       rtx xop0 = XEXP (x, 0);
7772       rtx xop1 = XEXP (x, 1);
7773
7774       return (arm_address_register_rtx_p (xop0, strict_p)
7775               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7776     }
7777 #endif
7778
7779   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7780            && code == SYMBOL_REF
7781            && CONSTANT_POOL_ADDRESS_P (x)
7782            && ! (flag_pic
7783                  && symbol_mentioned_p (get_pool_constant (x))
7784                  && ! pcrel_constant_p (get_pool_constant (x))))
7785     return 1;
7786
7787   return 0;
7788 }
7789
7790 /* Return true if we can avoid creating a constant pool entry for x.  */
7791 static bool
7792 can_avoid_literal_pool_for_label_p (rtx x)
7793 {
7794   /* Normally we can assign constant values to target registers without
7795      the help of constant pool.  But there are cases we have to use constant
7796      pool like:
7797      1) assign a label to register.
7798      2) sign-extend a 8bit value to 32bit and then assign to register.
7799
7800      Constant pool access in format:
7801      (set (reg r0) (mem (symbol_ref (".LC0"))))
7802      will cause the use of literal pool (later in function arm_reorg).
7803      So here we mark such format as an invalid format, then the compiler
7804      will adjust it into:
7805      (set (reg r0) (symbol_ref (".LC0")))
7806      (set (reg r0) (mem (reg r0))).
7807      No extra register is required, and (mem (reg r0)) won't cause the use
7808      of literal pools.  */
7809   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7810       && CONSTANT_POOL_ADDRESS_P (x))
7811     return 1;
7812   return 0;
7813 }
7814
7815
7816 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7817 static int
7818 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7819 {
7820   bool use_ldrd;
7821   enum rtx_code code = GET_CODE (x);
7822
7823   if (arm_address_register_rtx_p (x, strict_p))
7824     return 1;
7825
7826   use_ldrd = (TARGET_LDRD
7827               && (mode == DImode || mode == DFmode));
7828
7829   if (code == POST_INC || code == PRE_DEC
7830       || ((code == PRE_INC || code == POST_DEC)
7831           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7832     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7833
7834   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7835            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7836            && GET_CODE (XEXP (x, 1)) == PLUS
7837            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7838     {
7839       /* Thumb-2 only has autoincrement by constant.  */
7840       rtx addend = XEXP (XEXP (x, 1), 1);
7841       HOST_WIDE_INT offset;
7842
7843       if (!CONST_INT_P (addend))
7844         return 0;
7845
7846       offset = INTVAL(addend);
7847       if (GET_MODE_SIZE (mode) <= 4)
7848         return (offset > -256 && offset < 256);
7849
7850       return (use_ldrd && offset > -1024 && offset < 1024
7851               && (offset & 3) == 0);
7852     }
7853
7854   /* After reload constants split into minipools will have addresses
7855      from a LABEL_REF.  */
7856   else if (reload_completed
7857            && (code == LABEL_REF
7858                || (code == CONST
7859                    && GET_CODE (XEXP (x, 0)) == PLUS
7860                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7861                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7862     return 1;
7863
7864   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7865     return 0;
7866
7867   else if (code == PLUS)
7868     {
7869       rtx xop0 = XEXP (x, 0);
7870       rtx xop1 = XEXP (x, 1);
7871
7872       return ((arm_address_register_rtx_p (xop0, strict_p)
7873                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7874                    || (!strict_p && will_be_in_index_register (xop1))))
7875               || (arm_address_register_rtx_p (xop1, strict_p)
7876                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7877     }
7878
7879   else if (can_avoid_literal_pool_for_label_p (x))
7880     return 0;
7881
7882   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7883            && code == SYMBOL_REF
7884            && CONSTANT_POOL_ADDRESS_P (x)
7885            && ! (flag_pic
7886                  && symbol_mentioned_p (get_pool_constant (x))
7887                  && ! pcrel_constant_p (get_pool_constant (x))))
7888     return 1;
7889
7890   return 0;
7891 }
7892
7893 /* Return nonzero if INDEX is valid for an address index operand in
7894    ARM state.  */
7895 static int
7896 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7897                         int strict_p)
7898 {
7899   HOST_WIDE_INT range;
7900   enum rtx_code code = GET_CODE (index);
7901
7902   /* Standard coprocessor addressing modes.  */
7903   if (TARGET_HARD_FLOAT
7904       && (mode == SFmode || mode == DFmode))
7905     return (code == CONST_INT && INTVAL (index) < 1024
7906             && INTVAL (index) > -1024
7907             && (INTVAL (index) & 3) == 0);
7908
7909   /* For quad modes, we restrict the constant offset to be slightly less
7910      than what the instruction format permits.  We do this because for
7911      quad mode moves, we will actually decompose them into two separate
7912      double-mode reads or writes.  INDEX must therefore be a valid
7913      (double-mode) offset and so should INDEX+8.  */
7914   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7915     return (code == CONST_INT
7916             && INTVAL (index) < 1016
7917             && INTVAL (index) > -1024
7918             && (INTVAL (index) & 3) == 0);
7919
7920   /* We have no such constraint on double mode offsets, so we permit the
7921      full range of the instruction format.  */
7922   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7923     return (code == CONST_INT
7924             && INTVAL (index) < 1024
7925             && INTVAL (index) > -1024
7926             && (INTVAL (index) & 3) == 0);
7927
7928   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7929     return (code == CONST_INT
7930             && INTVAL (index) < 1024
7931             && INTVAL (index) > -1024
7932             && (INTVAL (index) & 3) == 0);
7933
7934   if (arm_address_register_rtx_p (index, strict_p)
7935       && (GET_MODE_SIZE (mode) <= 4))
7936     return 1;
7937
7938   if (mode == DImode || mode == DFmode)
7939     {
7940       if (code == CONST_INT)
7941         {
7942           HOST_WIDE_INT val = INTVAL (index);
7943
7944           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7945              If vldr is selected it uses arm_coproc_mem_operand.  */
7946           if (TARGET_LDRD)
7947             return val > -256 && val < 256;
7948           else
7949             return val > -4096 && val < 4092;
7950         }
7951
7952       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7953     }
7954
7955   if (GET_MODE_SIZE (mode) <= 4
7956       && ! (arm_arch4
7957             && (mode == HImode
7958                 || mode == HFmode
7959                 || (mode == QImode && outer == SIGN_EXTEND))))
7960     {
7961       if (code == MULT)
7962         {
7963           rtx xiop0 = XEXP (index, 0);
7964           rtx xiop1 = XEXP (index, 1);
7965
7966           return ((arm_address_register_rtx_p (xiop0, strict_p)
7967                    && power_of_two_operand (xiop1, SImode))
7968                   || (arm_address_register_rtx_p (xiop1, strict_p)
7969                       && power_of_two_operand (xiop0, SImode)));
7970         }
7971       else if (code == LSHIFTRT || code == ASHIFTRT
7972                || code == ASHIFT || code == ROTATERT)
7973         {
7974           rtx op = XEXP (index, 1);
7975
7976           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7977                   && CONST_INT_P (op)
7978                   && INTVAL (op) > 0
7979                   && INTVAL (op) <= 31);
7980         }
7981     }
7982
7983   /* For ARM v4 we may be doing a sign-extend operation during the
7984      load.  */
7985   if (arm_arch4)
7986     {
7987       if (mode == HImode
7988           || mode == HFmode
7989           || (outer == SIGN_EXTEND && mode == QImode))
7990         range = 256;
7991       else
7992         range = 4096;
7993     }
7994   else
7995     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7996
7997   return (code == CONST_INT
7998           && INTVAL (index) < range
7999           && INTVAL (index) > -range);
8000 }
8001
8002 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8003    index operand.  i.e. 1, 2, 4 or 8.  */
8004 static bool
8005 thumb2_index_mul_operand (rtx op)
8006 {
8007   HOST_WIDE_INT val;
8008
8009   if (!CONST_INT_P (op))
8010     return false;
8011
8012   val = INTVAL(op);
8013   return (val == 1 || val == 2 || val == 4 || val == 8);
8014 }
8015
8016 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8017 static int
8018 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8019 {
8020   enum rtx_code code = GET_CODE (index);
8021
8022   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8023   /* Standard coprocessor addressing modes.  */
8024   if (TARGET_HARD_FLOAT
8025       && (mode == SFmode || mode == DFmode))
8026     return (code == CONST_INT && INTVAL (index) < 1024
8027             /* Thumb-2 allows only > -256 index range for it's core register
8028                load/stores. Since we allow SF/DF in core registers, we have
8029                to use the intersection between -256~4096 (core) and -1024~1024
8030                (coprocessor).  */
8031             && INTVAL (index) > -256
8032             && (INTVAL (index) & 3) == 0);
8033
8034   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8035     {
8036       /* For DImode assume values will usually live in core regs
8037          and only allow LDRD addressing modes.  */
8038       if (!TARGET_LDRD || mode != DImode)
8039         return (code == CONST_INT
8040                 && INTVAL (index) < 1024
8041                 && INTVAL (index) > -1024
8042                 && (INTVAL (index) & 3) == 0);
8043     }
8044
8045   /* For quad modes, we restrict the constant offset to be slightly less
8046      than what the instruction format permits.  We do this because for
8047      quad mode moves, we will actually decompose them into two separate
8048      double-mode reads or writes.  INDEX must therefore be a valid
8049      (double-mode) offset and so should INDEX+8.  */
8050   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8051     return (code == CONST_INT
8052             && INTVAL (index) < 1016
8053             && INTVAL (index) > -1024
8054             && (INTVAL (index) & 3) == 0);
8055
8056   /* We have no such constraint on double mode offsets, so we permit the
8057      full range of the instruction format.  */
8058   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8059     return (code == CONST_INT
8060             && INTVAL (index) < 1024
8061             && INTVAL (index) > -1024
8062             && (INTVAL (index) & 3) == 0);
8063
8064   if (arm_address_register_rtx_p (index, strict_p)
8065       && (GET_MODE_SIZE (mode) <= 4))
8066     return 1;
8067
8068   if (mode == DImode || mode == DFmode)
8069     {
8070       if (code == CONST_INT)
8071         {
8072           HOST_WIDE_INT val = INTVAL (index);
8073           /* Thumb-2 ldrd only has reg+const addressing modes.
8074              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8075              If vldr is selected it uses arm_coproc_mem_operand.  */
8076           if (TARGET_LDRD)
8077             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8078           else
8079             return IN_RANGE (val, -255, 4095 - 4);
8080         }
8081       else
8082         return 0;
8083     }
8084
8085   if (code == MULT)
8086     {
8087       rtx xiop0 = XEXP (index, 0);
8088       rtx xiop1 = XEXP (index, 1);
8089
8090       return ((arm_address_register_rtx_p (xiop0, strict_p)
8091                && thumb2_index_mul_operand (xiop1))
8092               || (arm_address_register_rtx_p (xiop1, strict_p)
8093                   && thumb2_index_mul_operand (xiop0)));
8094     }
8095   else if (code == ASHIFT)
8096     {
8097       rtx op = XEXP (index, 1);
8098
8099       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8100               && CONST_INT_P (op)
8101               && INTVAL (op) > 0
8102               && INTVAL (op) <= 3);
8103     }
8104
8105   return (code == CONST_INT
8106           && INTVAL (index) < 4096
8107           && INTVAL (index) > -256);
8108 }
8109
8110 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8111 static int
8112 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8113 {
8114   int regno;
8115
8116   if (!REG_P (x))
8117     return 0;
8118
8119   regno = REGNO (x);
8120
8121   if (strict_p)
8122     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8123
8124   return (regno <= LAST_LO_REGNUM
8125           || regno > LAST_VIRTUAL_REGISTER
8126           || regno == FRAME_POINTER_REGNUM
8127           || (GET_MODE_SIZE (mode) >= 4
8128               && (regno == STACK_POINTER_REGNUM
8129                   || regno >= FIRST_PSEUDO_REGISTER
8130                   || x == hard_frame_pointer_rtx
8131                   || x == arg_pointer_rtx)));
8132 }
8133
8134 /* Return nonzero if x is a legitimate index register.  This is the case
8135    for any base register that can access a QImode object.  */
8136 inline static int
8137 thumb1_index_register_rtx_p (rtx x, int strict_p)
8138 {
8139   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8140 }
8141
8142 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8143
8144    The AP may be eliminated to either the SP or the FP, so we use the
8145    least common denominator, e.g. SImode, and offsets from 0 to 64.
8146
8147    ??? Verify whether the above is the right approach.
8148
8149    ??? Also, the FP may be eliminated to the SP, so perhaps that
8150    needs special handling also.
8151
8152    ??? Look at how the mips16 port solves this problem.  It probably uses
8153    better ways to solve some of these problems.
8154
8155    Although it is not incorrect, we don't accept QImode and HImode
8156    addresses based on the frame pointer or arg pointer until the
8157    reload pass starts.  This is so that eliminating such addresses
8158    into stack based ones won't produce impossible code.  */
8159 int
8160 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8161 {
8162   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8163     return 0;
8164
8165   /* ??? Not clear if this is right.  Experiment.  */
8166   if (GET_MODE_SIZE (mode) < 4
8167       && !(reload_in_progress || reload_completed)
8168       && (reg_mentioned_p (frame_pointer_rtx, x)
8169           || reg_mentioned_p (arg_pointer_rtx, x)
8170           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8171           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8172           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8173           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8174     return 0;
8175
8176   /* Accept any base register.  SP only in SImode or larger.  */
8177   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8178     return 1;
8179
8180   /* This is PC relative data before arm_reorg runs.  */
8181   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8182            && GET_CODE (x) == SYMBOL_REF
8183            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8184     return 1;
8185
8186   /* This is PC relative data after arm_reorg runs.  */
8187   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8188            && reload_completed
8189            && (GET_CODE (x) == LABEL_REF
8190                || (GET_CODE (x) == CONST
8191                    && GET_CODE (XEXP (x, 0)) == PLUS
8192                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8193                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8194     return 1;
8195
8196   /* Post-inc indexing only supported for SImode and larger.  */
8197   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8198            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8199     return 1;
8200
8201   else if (GET_CODE (x) == PLUS)
8202     {
8203       /* REG+REG address can be any two index registers.  */
8204       /* We disallow FRAME+REG addressing since we know that FRAME
8205          will be replaced with STACK, and SP relative addressing only
8206          permits SP+OFFSET.  */
8207       if (GET_MODE_SIZE (mode) <= 4
8208           && XEXP (x, 0) != frame_pointer_rtx
8209           && XEXP (x, 1) != frame_pointer_rtx
8210           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8211           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8212               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8213         return 1;
8214
8215       /* REG+const has 5-7 bit offset for non-SP registers.  */
8216       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8217                 || XEXP (x, 0) == arg_pointer_rtx)
8218                && CONST_INT_P (XEXP (x, 1))
8219                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8220         return 1;
8221
8222       /* REG+const has 10-bit offset for SP, but only SImode and
8223          larger is supported.  */
8224       /* ??? Should probably check for DI/DFmode overflow here
8225          just like GO_IF_LEGITIMATE_OFFSET does.  */
8226       else if (REG_P (XEXP (x, 0))
8227                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8228                && GET_MODE_SIZE (mode) >= 4
8229                && CONST_INT_P (XEXP (x, 1))
8230                && INTVAL (XEXP (x, 1)) >= 0
8231                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8232                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8233         return 1;
8234
8235       else if (REG_P (XEXP (x, 0))
8236                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8237                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8238                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8239                        && REGNO (XEXP (x, 0))
8240                           <= LAST_VIRTUAL_POINTER_REGISTER))
8241                && GET_MODE_SIZE (mode) >= 4
8242                && CONST_INT_P (XEXP (x, 1))
8243                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8244         return 1;
8245     }
8246
8247   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8248            && GET_MODE_SIZE (mode) == 4
8249            && GET_CODE (x) == SYMBOL_REF
8250            && CONSTANT_POOL_ADDRESS_P (x)
8251            && ! (flag_pic
8252                  && symbol_mentioned_p (get_pool_constant (x))
8253                  && ! pcrel_constant_p (get_pool_constant (x))))
8254     return 1;
8255
8256   return 0;
8257 }
8258
8259 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8260    instruction of mode MODE.  */
8261 int
8262 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8263 {
8264   switch (GET_MODE_SIZE (mode))
8265     {
8266     case 1:
8267       return val >= 0 && val < 32;
8268
8269     case 2:
8270       return val >= 0 && val < 64 && (val & 1) == 0;
8271
8272     default:
8273       return (val >= 0
8274               && (val + GET_MODE_SIZE (mode)) <= 128
8275               && (val & 3) == 0);
8276     }
8277 }
8278
8279 bool
8280 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8281 {
8282   if (TARGET_ARM)
8283     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8284   else if (TARGET_THUMB2)
8285     return thumb2_legitimate_address_p (mode, x, strict_p);
8286   else /* if (TARGET_THUMB1) */
8287     return thumb1_legitimate_address_p (mode, x, strict_p);
8288 }
8289
8290 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8291
8292    Given an rtx X being reloaded into a reg required to be
8293    in class CLASS, return the class of reg to actually use.
8294    In general this is just CLASS, but for the Thumb core registers and
8295    immediate constants we prefer a LO_REGS class or a subset.  */
8296
8297 static reg_class_t
8298 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8299 {
8300   if (TARGET_32BIT)
8301     return rclass;
8302   else
8303     {
8304       if (rclass == GENERAL_REGS)
8305         return LO_REGS;
8306       else
8307         return rclass;
8308     }
8309 }
8310
8311 /* Build the SYMBOL_REF for __tls_get_addr.  */
8312
8313 static GTY(()) rtx tls_get_addr_libfunc;
8314
8315 static rtx
8316 get_tls_get_addr (void)
8317 {
8318   if (!tls_get_addr_libfunc)
8319     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8320   return tls_get_addr_libfunc;
8321 }
8322
8323 rtx
8324 arm_load_tp (rtx target)
8325 {
8326   if (!target)
8327     target = gen_reg_rtx (SImode);
8328
8329   if (TARGET_HARD_TP)
8330     {
8331       /* Can return in any reg.  */
8332       emit_insn (gen_load_tp_hard (target));
8333     }
8334   else
8335     {
8336       /* Always returned in r0.  Immediately copy the result into a pseudo,
8337          otherwise other uses of r0 (e.g. setting up function arguments) may
8338          clobber the value.  */
8339
8340       rtx tmp;
8341
8342       emit_insn (gen_load_tp_soft ());
8343
8344       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8345       emit_move_insn (target, tmp);
8346     }
8347   return target;
8348 }
8349
8350 static rtx
8351 load_tls_operand (rtx x, rtx reg)
8352 {
8353   rtx tmp;
8354
8355   if (reg == NULL_RTX)
8356     reg = gen_reg_rtx (SImode);
8357
8358   tmp = gen_rtx_CONST (SImode, x);
8359
8360   emit_move_insn (reg, tmp);
8361
8362   return reg;
8363 }
8364
8365 static rtx_insn *
8366 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8367 {
8368   rtx label, labelno, sum;
8369
8370   gcc_assert (reloc != TLS_DESCSEQ);
8371   start_sequence ();
8372
8373   labelno = GEN_INT (pic_labelno++);
8374   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8375   label = gen_rtx_CONST (VOIDmode, label);
8376
8377   sum = gen_rtx_UNSPEC (Pmode,
8378                         gen_rtvec (4, x, GEN_INT (reloc), label,
8379                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8380                         UNSPEC_TLS);
8381   reg = load_tls_operand (sum, reg);
8382
8383   if (TARGET_ARM)
8384     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8385   else
8386     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8387
8388   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8389                                      LCT_PURE, /* LCT_CONST?  */
8390                                      Pmode, reg, Pmode);
8391
8392   rtx_insn *insns = get_insns ();
8393   end_sequence ();
8394
8395   return insns;
8396 }
8397
8398 static rtx
8399 arm_tls_descseq_addr (rtx x, rtx reg)
8400 {
8401   rtx labelno = GEN_INT (pic_labelno++);
8402   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8403   rtx sum = gen_rtx_UNSPEC (Pmode,
8404                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8405                                        gen_rtx_CONST (VOIDmode, label),
8406                                        GEN_INT (!TARGET_ARM)),
8407                             UNSPEC_TLS);
8408   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8409
8410   emit_insn (gen_tlscall (x, labelno));
8411   if (!reg)
8412     reg = gen_reg_rtx (SImode);
8413   else
8414     gcc_assert (REGNO (reg) != R0_REGNUM);
8415
8416   emit_move_insn (reg, reg0);
8417
8418   return reg;
8419 }
8420
8421 rtx
8422 legitimize_tls_address (rtx x, rtx reg)
8423 {
8424   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8425   rtx_insn *insns;
8426   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8427
8428   switch (model)
8429     {
8430     case TLS_MODEL_GLOBAL_DYNAMIC:
8431       if (TARGET_GNU2_TLS)
8432         {
8433           reg = arm_tls_descseq_addr (x, reg);
8434
8435           tp = arm_load_tp (NULL_RTX);
8436
8437           dest = gen_rtx_PLUS (Pmode, tp, reg);
8438         }
8439       else
8440         {
8441           /* Original scheme */
8442           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8443           dest = gen_reg_rtx (Pmode);
8444           emit_libcall_block (insns, dest, ret, x);
8445         }
8446       return dest;
8447
8448     case TLS_MODEL_LOCAL_DYNAMIC:
8449       if (TARGET_GNU2_TLS)
8450         {
8451           reg = arm_tls_descseq_addr (x, reg);
8452
8453           tp = arm_load_tp (NULL_RTX);
8454
8455           dest = gen_rtx_PLUS (Pmode, tp, reg);
8456         }
8457       else
8458         {
8459           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8460
8461           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8462              share the LDM result with other LD model accesses.  */
8463           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8464                                 UNSPEC_TLS);
8465           dest = gen_reg_rtx (Pmode);
8466           emit_libcall_block (insns, dest, ret, eqv);
8467
8468           /* Load the addend.  */
8469           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8470                                                      GEN_INT (TLS_LDO32)),
8471                                    UNSPEC_TLS);
8472           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8473           dest = gen_rtx_PLUS (Pmode, dest, addend);
8474         }
8475       return dest;
8476
8477     case TLS_MODEL_INITIAL_EXEC:
8478       labelno = GEN_INT (pic_labelno++);
8479       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8480       label = gen_rtx_CONST (VOIDmode, label);
8481       sum = gen_rtx_UNSPEC (Pmode,
8482                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8483                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8484                             UNSPEC_TLS);
8485       reg = load_tls_operand (sum, reg);
8486
8487       if (TARGET_ARM)
8488         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8489       else if (TARGET_THUMB2)
8490         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8491       else
8492         {
8493           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8494           emit_move_insn (reg, gen_const_mem (SImode, reg));
8495         }
8496
8497       tp = arm_load_tp (NULL_RTX);
8498
8499       return gen_rtx_PLUS (Pmode, tp, reg);
8500
8501     case TLS_MODEL_LOCAL_EXEC:
8502       tp = arm_load_tp (NULL_RTX);
8503
8504       reg = gen_rtx_UNSPEC (Pmode,
8505                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8506                             UNSPEC_TLS);
8507       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8508
8509       return gen_rtx_PLUS (Pmode, tp, reg);
8510
8511     default:
8512       abort ();
8513     }
8514 }
8515
8516 /* Try machine-dependent ways of modifying an illegitimate address
8517    to be legitimate.  If we find one, return the new, valid address.  */
8518 rtx
8519 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8520 {
8521   if (arm_tls_referenced_p (x))
8522     {
8523       rtx addend = NULL;
8524
8525       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8526         {
8527           addend = XEXP (XEXP (x, 0), 1);
8528           x = XEXP (XEXP (x, 0), 0);
8529         }
8530
8531       if (GET_CODE (x) != SYMBOL_REF)
8532         return x;
8533
8534       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8535
8536       x = legitimize_tls_address (x, NULL_RTX);
8537
8538       if (addend)
8539         {
8540           x = gen_rtx_PLUS (SImode, x, addend);
8541           orig_x = x;
8542         }
8543       else
8544         return x;
8545     }
8546
8547   if (!TARGET_ARM)
8548     {
8549       /* TODO: legitimize_address for Thumb2.  */
8550       if (TARGET_THUMB2)
8551         return x;
8552       return thumb_legitimize_address (x, orig_x, mode);
8553     }
8554
8555   if (GET_CODE (x) == PLUS)
8556     {
8557       rtx xop0 = XEXP (x, 0);
8558       rtx xop1 = XEXP (x, 1);
8559
8560       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8561         xop0 = force_reg (SImode, xop0);
8562
8563       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8564           && !symbol_mentioned_p (xop1))
8565         xop1 = force_reg (SImode, xop1);
8566
8567       if (ARM_BASE_REGISTER_RTX_P (xop0)
8568           && CONST_INT_P (xop1))
8569         {
8570           HOST_WIDE_INT n, low_n;
8571           rtx base_reg, val;
8572           n = INTVAL (xop1);
8573
8574           /* VFP addressing modes actually allow greater offsets, but for
8575              now we just stick with the lowest common denominator.  */
8576           if (mode == DImode || mode == DFmode)
8577             {
8578               low_n = n & 0x0f;
8579               n &= ~0x0f;
8580               if (low_n > 4)
8581                 {
8582                   n += 16;
8583                   low_n -= 16;
8584                 }
8585             }
8586           else
8587             {
8588               low_n = ((mode) == TImode ? 0
8589                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8590               n -= low_n;
8591             }
8592
8593           base_reg = gen_reg_rtx (SImode);
8594           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8595           emit_move_insn (base_reg, val);
8596           x = plus_constant (Pmode, base_reg, low_n);
8597         }
8598       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8599         x = gen_rtx_PLUS (SImode, xop0, xop1);
8600     }
8601
8602   /* XXX We don't allow MINUS any more -- see comment in
8603      arm_legitimate_address_outer_p ().  */
8604   else if (GET_CODE (x) == MINUS)
8605     {
8606       rtx xop0 = XEXP (x, 0);
8607       rtx xop1 = XEXP (x, 1);
8608
8609       if (CONSTANT_P (xop0))
8610         xop0 = force_reg (SImode, xop0);
8611
8612       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8613         xop1 = force_reg (SImode, xop1);
8614
8615       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8616         x = gen_rtx_MINUS (SImode, xop0, xop1);
8617     }
8618
8619   /* Make sure to take full advantage of the pre-indexed addressing mode
8620      with absolute addresses which often allows for the base register to
8621      be factorized for multiple adjacent memory references, and it might
8622      even allows for the mini pool to be avoided entirely. */
8623   else if (CONST_INT_P (x) && optimize > 0)
8624     {
8625       unsigned int bits;
8626       HOST_WIDE_INT mask, base, index;
8627       rtx base_reg;
8628
8629       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8630          use a 8-bit index. So let's use a 12-bit index for SImode only and
8631          hope that arm_gen_constant will enable ldrb to use more bits. */
8632       bits = (mode == SImode) ? 12 : 8;
8633       mask = (1 << bits) - 1;
8634       base = INTVAL (x) & ~mask;
8635       index = INTVAL (x) & mask;
8636       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8637         {
8638           /* It'll most probably be more efficient to generate the base
8639              with more bits set and use a negative index instead. */
8640           base |= mask;
8641           index -= mask;
8642         }
8643       base_reg = force_reg (SImode, GEN_INT (base));
8644       x = plus_constant (Pmode, base_reg, index);
8645     }
8646
8647   if (flag_pic)
8648     {
8649       /* We need to find and carefully transform any SYMBOL and LABEL
8650          references; so go back to the original address expression.  */
8651       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8652
8653       if (new_x != orig_x)
8654         x = new_x;
8655     }
8656
8657   return x;
8658 }
8659
8660
8661 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8662    to be legitimate.  If we find one, return the new, valid address.  */
8663 rtx
8664 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8665 {
8666   if (GET_CODE (x) == PLUS
8667       && CONST_INT_P (XEXP (x, 1))
8668       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8669           || INTVAL (XEXP (x, 1)) < 0))
8670     {
8671       rtx xop0 = XEXP (x, 0);
8672       rtx xop1 = XEXP (x, 1);
8673       HOST_WIDE_INT offset = INTVAL (xop1);
8674
8675       /* Try and fold the offset into a biasing of the base register and
8676          then offsetting that.  Don't do this when optimizing for space
8677          since it can cause too many CSEs.  */
8678       if (optimize_size && offset >= 0
8679           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8680         {
8681           HOST_WIDE_INT delta;
8682
8683           if (offset >= 256)
8684             delta = offset - (256 - GET_MODE_SIZE (mode));
8685           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8686             delta = 31 * GET_MODE_SIZE (mode);
8687           else
8688             delta = offset & (~31 * GET_MODE_SIZE (mode));
8689
8690           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8691                                 NULL_RTX);
8692           x = plus_constant (Pmode, xop0, delta);
8693         }
8694       else if (offset < 0 && offset > -256)
8695         /* Small negative offsets are best done with a subtract before the
8696            dereference, forcing these into a register normally takes two
8697            instructions.  */
8698         x = force_operand (x, NULL_RTX);
8699       else
8700         {
8701           /* For the remaining cases, force the constant into a register.  */
8702           xop1 = force_reg (SImode, xop1);
8703           x = gen_rtx_PLUS (SImode, xop0, xop1);
8704         }
8705     }
8706   else if (GET_CODE (x) == PLUS
8707            && s_register_operand (XEXP (x, 1), SImode)
8708            && !s_register_operand (XEXP (x, 0), SImode))
8709     {
8710       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8711
8712       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8713     }
8714
8715   if (flag_pic)
8716     {
8717       /* We need to find and carefully transform any SYMBOL and LABEL
8718          references; so go back to the original address expression.  */
8719       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8720
8721       if (new_x != orig_x)
8722         x = new_x;
8723     }
8724
8725   return x;
8726 }
8727
8728 /* Return TRUE if X contains any TLS symbol references.  */
8729
8730 bool
8731 arm_tls_referenced_p (rtx x)
8732 {
8733   if (! TARGET_HAVE_TLS)
8734     return false;
8735
8736   subrtx_iterator::array_type array;
8737   FOR_EACH_SUBRTX (iter, array, x, ALL)
8738     {
8739       const_rtx x = *iter;
8740       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8741         {
8742           /* ARM currently does not provide relocations to encode TLS variables
8743              into AArch32 instructions, only data, so there is no way to
8744              currently implement these if a literal pool is disabled.  */
8745           if (arm_disable_literal_pool)
8746             sorry ("accessing thread-local storage is not currently supported "
8747                    "with -mpure-code or -mslow-flash-data");
8748
8749           return true;
8750         }
8751
8752       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8753          TLS offsets, not real symbol references.  */
8754       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8755         iter.skip_subrtxes ();
8756     }
8757   return false;
8758 }
8759
8760 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8761
8762    On the ARM, allow any integer (invalid ones are removed later by insn
8763    patterns), nice doubles and symbol_refs which refer to the function's
8764    constant pool XXX.
8765
8766    When generating pic allow anything.  */
8767
8768 static bool
8769 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8770 {
8771   return flag_pic || !label_mentioned_p (x);
8772 }
8773
8774 static bool
8775 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8776 {
8777   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8778      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8779      for ARMv8-M Baseline or later the result is valid.  */
8780   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8781     x = XEXP (x, 0);
8782
8783   return (CONST_INT_P (x)
8784           || CONST_DOUBLE_P (x)
8785           || CONSTANT_ADDRESS_P (x)
8786           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8787           || flag_pic);
8788 }
8789
8790 static bool
8791 arm_legitimate_constant_p (machine_mode mode, rtx x)
8792 {
8793   return (!arm_cannot_force_const_mem (mode, x)
8794           && (TARGET_32BIT
8795               ? arm_legitimate_constant_p_1 (mode, x)
8796               : thumb_legitimate_constant_p (mode, x)));
8797 }
8798
8799 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8800
8801 static bool
8802 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8803 {
8804   rtx base, offset;
8805
8806   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8807     {
8808       split_const (x, &base, &offset);
8809       if (GET_CODE (base) == SYMBOL_REF
8810           && !offset_within_block_p (base, INTVAL (offset)))
8811         return true;
8812     }
8813   return arm_tls_referenced_p (x);
8814 }
8815 \f
8816 #define REG_OR_SUBREG_REG(X)                                            \
8817   (REG_P (X)                                                    \
8818    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8819
8820 #define REG_OR_SUBREG_RTX(X)                    \
8821    (REG_P (X) ? (X) : SUBREG_REG (X))
8822
8823 static inline int
8824 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8825 {
8826   machine_mode mode = GET_MODE (x);
8827   int total, words;
8828
8829   switch (code)
8830     {
8831     case ASHIFT:
8832     case ASHIFTRT:
8833     case LSHIFTRT:
8834     case ROTATERT:
8835       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8836
8837     case PLUS:
8838     case MINUS:
8839     case COMPARE:
8840     case NEG:
8841     case NOT:
8842       return COSTS_N_INSNS (1);
8843
8844     case MULT:
8845       if (arm_arch6m && arm_m_profile_small_mul)
8846         return COSTS_N_INSNS (32);
8847
8848       if (CONST_INT_P (XEXP (x, 1)))
8849         {
8850           int cycles = 0;
8851           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8852
8853           while (i)
8854             {
8855               i >>= 2;
8856               cycles++;
8857             }
8858           return COSTS_N_INSNS (2) + cycles;
8859         }
8860       return COSTS_N_INSNS (1) + 16;
8861
8862     case SET:
8863       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8864          the mode.  */
8865       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8866       return (COSTS_N_INSNS (words)
8867               + 4 * ((MEM_P (SET_SRC (x)))
8868                      + MEM_P (SET_DEST (x))));
8869
8870     case CONST_INT:
8871       if (outer == SET)
8872         {
8873           if (UINTVAL (x) < 256
8874               /* 16-bit constant.  */
8875               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8876             return 0;
8877           if (thumb_shiftable_const (INTVAL (x)))
8878             return COSTS_N_INSNS (2);
8879           return COSTS_N_INSNS (3);
8880         }
8881       else if ((outer == PLUS || outer == COMPARE)
8882                && INTVAL (x) < 256 && INTVAL (x) > -256)
8883         return 0;
8884       else if ((outer == IOR || outer == XOR || outer == AND)
8885                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8886         return COSTS_N_INSNS (1);
8887       else if (outer == AND)
8888         {
8889           int i;
8890           /* This duplicates the tests in the andsi3 expander.  */
8891           for (i = 9; i <= 31; i++)
8892             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8893                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8894               return COSTS_N_INSNS (2);
8895         }
8896       else if (outer == ASHIFT || outer == ASHIFTRT
8897                || outer == LSHIFTRT)
8898         return 0;
8899       return COSTS_N_INSNS (2);
8900
8901     case CONST:
8902     case CONST_DOUBLE:
8903     case LABEL_REF:
8904     case SYMBOL_REF:
8905       return COSTS_N_INSNS (3);
8906
8907     case UDIV:
8908     case UMOD:
8909     case DIV:
8910     case MOD:
8911       return 100;
8912
8913     case TRUNCATE:
8914       return 99;
8915
8916     case AND:
8917     case XOR:
8918     case IOR:
8919       /* XXX guess.  */
8920       return 8;
8921
8922     case MEM:
8923       /* XXX another guess.  */
8924       /* Memory costs quite a lot for the first word, but subsequent words
8925          load at the equivalent of a single insn each.  */
8926       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8927               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8928                  ? 4 : 0));
8929
8930     case IF_THEN_ELSE:
8931       /* XXX a guess.  */
8932       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8933         return 14;
8934       return 2;
8935
8936     case SIGN_EXTEND:
8937     case ZERO_EXTEND:
8938       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8939       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8940
8941       if (mode == SImode)
8942         return total;
8943
8944       if (arm_arch6)
8945         return total + COSTS_N_INSNS (1);
8946
8947       /* Assume a two-shift sequence.  Increase the cost slightly so
8948          we prefer actual shifts over an extend operation.  */
8949       return total + 1 + COSTS_N_INSNS (2);
8950
8951     default:
8952       return 99;
8953     }
8954 }
8955
8956 /* Estimates the size cost of thumb1 instructions.
8957    For now most of the code is copied from thumb1_rtx_costs. We need more
8958    fine grain tuning when we have more related test cases.  */
8959 static inline int
8960 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8961 {
8962   machine_mode mode = GET_MODE (x);
8963   int words, cost;
8964
8965   switch (code)
8966     {
8967     case ASHIFT:
8968     case ASHIFTRT:
8969     case LSHIFTRT:
8970     case ROTATERT:
8971       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8972
8973     case PLUS:
8974     case MINUS:
8975       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8976          defined by RTL expansion, especially for the expansion of
8977          multiplication.  */
8978       if ((GET_CODE (XEXP (x, 0)) == MULT
8979            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8980           || (GET_CODE (XEXP (x, 1)) == MULT
8981               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8982         return COSTS_N_INSNS (2);
8983       /* Fall through.  */
8984     case COMPARE:
8985     case NEG:
8986     case NOT:
8987       return COSTS_N_INSNS (1);
8988
8989     case MULT:
8990       if (CONST_INT_P (XEXP (x, 1)))
8991         {
8992           /* Thumb1 mul instruction can't operate on const. We must Load it
8993              into a register first.  */
8994           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8995           /* For the targets which have a very small and high-latency multiply
8996              unit, we prefer to synthesize the mult with up to 5 instructions,
8997              giving a good balance between size and performance.  */
8998           if (arm_arch6m && arm_m_profile_small_mul)
8999             return COSTS_N_INSNS (5);
9000           else
9001             return COSTS_N_INSNS (1) + const_size;
9002         }
9003       return COSTS_N_INSNS (1);
9004
9005     case SET:
9006       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9007          the mode.  */
9008       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9009       cost = COSTS_N_INSNS (words);
9010       if (satisfies_constraint_J (SET_SRC (x))
9011           || satisfies_constraint_K (SET_SRC (x))
9012              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9013           || (CONST_INT_P (SET_SRC (x))
9014               && UINTVAL (SET_SRC (x)) >= 256
9015               && TARGET_HAVE_MOVT
9016               && satisfies_constraint_j (SET_SRC (x)))
9017              /* thumb1_movdi_insn.  */
9018           || ((words > 1) && MEM_P (SET_SRC (x))))
9019         cost += COSTS_N_INSNS (1);
9020       return cost;
9021
9022     case CONST_INT:
9023       if (outer == SET)
9024         {
9025           if (UINTVAL (x) < 256)
9026             return COSTS_N_INSNS (1);
9027           /* movw is 4byte long.  */
9028           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9029             return COSTS_N_INSNS (2);
9030           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9031           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9032             return COSTS_N_INSNS (2);
9033           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9034           if (thumb_shiftable_const (INTVAL (x)))
9035             return COSTS_N_INSNS (2);
9036           return COSTS_N_INSNS (3);
9037         }
9038       else if ((outer == PLUS || outer == COMPARE)
9039                && INTVAL (x) < 256 && INTVAL (x) > -256)
9040         return 0;
9041       else if ((outer == IOR || outer == XOR || outer == AND)
9042                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9043         return COSTS_N_INSNS (1);
9044       else if (outer == AND)
9045         {
9046           int i;
9047           /* This duplicates the tests in the andsi3 expander.  */
9048           for (i = 9; i <= 31; i++)
9049             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9050                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9051               return COSTS_N_INSNS (2);
9052         }
9053       else if (outer == ASHIFT || outer == ASHIFTRT
9054                || outer == LSHIFTRT)
9055         return 0;
9056       return COSTS_N_INSNS (2);
9057
9058     case CONST:
9059     case CONST_DOUBLE:
9060     case LABEL_REF:
9061     case SYMBOL_REF:
9062       return COSTS_N_INSNS (3);
9063
9064     case UDIV:
9065     case UMOD:
9066     case DIV:
9067     case MOD:
9068       return 100;
9069
9070     case TRUNCATE:
9071       return 99;
9072
9073     case AND:
9074     case XOR:
9075     case IOR:
9076       return COSTS_N_INSNS (1);
9077
9078     case MEM:
9079       return (COSTS_N_INSNS (1)
9080               + COSTS_N_INSNS (1)
9081                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9082               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9083                  ? COSTS_N_INSNS (1) : 0));
9084
9085     case IF_THEN_ELSE:
9086       /* XXX a guess.  */
9087       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9088         return 14;
9089       return 2;
9090
9091     case ZERO_EXTEND:
9092       /* XXX still guessing.  */
9093       switch (GET_MODE (XEXP (x, 0)))
9094         {
9095           case E_QImode:
9096             return (1 + (mode == DImode ? 4 : 0)
9097                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9098
9099           case E_HImode:
9100             return (4 + (mode == DImode ? 4 : 0)
9101                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9102
9103           case E_SImode:
9104             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106           default:
9107             return 99;
9108         }
9109
9110     default:
9111       return 99;
9112     }
9113 }
9114
9115 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9116    operand, then return the operand that is being shifted.  If the shift
9117    is not by a constant, then set SHIFT_REG to point to the operand.
9118    Return NULL if OP is not a shifter operand.  */
9119 static rtx
9120 shifter_op_p (rtx op, rtx *shift_reg)
9121 {
9122   enum rtx_code code = GET_CODE (op);
9123
9124   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9125       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9126     return XEXP (op, 0);
9127   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9128     return XEXP (op, 0);
9129   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9130            || code == ASHIFTRT)
9131     {
9132       if (!CONST_INT_P (XEXP (op, 1)))
9133         *shift_reg = XEXP (op, 1);
9134       return XEXP (op, 0);
9135     }
9136
9137   return NULL;
9138 }
9139
9140 static bool
9141 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9142 {
9143   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9144   rtx_code code = GET_CODE (x);
9145   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9146
9147   switch (XINT (x, 1))
9148     {
9149     case UNSPEC_UNALIGNED_LOAD:
9150       /* We can only do unaligned loads into the integer unit, and we can't
9151          use LDM or LDRD.  */
9152       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9153       if (speed_p)
9154         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9155                   + extra_cost->ldst.load_unaligned);
9156
9157 #ifdef NOT_YET
9158       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9159                                  ADDR_SPACE_GENERIC, speed_p);
9160 #endif
9161       return true;
9162
9163     case UNSPEC_UNALIGNED_STORE:
9164       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9165       if (speed_p)
9166         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9167                   + extra_cost->ldst.store_unaligned);
9168
9169       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9170 #ifdef NOT_YET
9171       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9172                                  ADDR_SPACE_GENERIC, speed_p);
9173 #endif
9174       return true;
9175
9176     case UNSPEC_VRINTZ:
9177     case UNSPEC_VRINTP:
9178     case UNSPEC_VRINTM:
9179     case UNSPEC_VRINTR:
9180     case UNSPEC_VRINTX:
9181     case UNSPEC_VRINTA:
9182       if (speed_p)
9183         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9184
9185       return true;
9186     default:
9187       *cost = COSTS_N_INSNS (2);
9188       break;
9189     }
9190   return true;
9191 }
9192
9193 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9194    call (one insn for -Os) and then one for processing the result.  */
9195 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9196
9197 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9198         do                                                              \
9199           {                                                             \
9200             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9201             if (shift_op != NULL                                        \
9202                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9203               {                                                         \
9204                 if (shift_reg)                                          \
9205                   {                                                     \
9206                     if (speed_p)                                        \
9207                       *cost += extra_cost->alu.arith_shift_reg;         \
9208                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9209                                        ASHIFT, 1, speed_p);             \
9210                   }                                                     \
9211                 else if (speed_p)                                       \
9212                   *cost += extra_cost->alu.arith_shift;                 \
9213                                                                         \
9214                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9215                                     ASHIFT, 0, speed_p)                 \
9216                           + rtx_cost (XEXP (x, 1 - IDX),                \
9217                                       GET_MODE (shift_op),              \
9218                                       OP, 1, speed_p));                 \
9219                 return true;                                            \
9220               }                                                         \
9221           }                                                             \
9222         while (0);
9223
9224 /* RTX costs.  Make an estimate of the cost of executing the operation
9225    X, which is contained with an operation with code OUTER_CODE.
9226    SPEED_P indicates whether the cost desired is the performance cost,
9227    or the size cost.  The estimate is stored in COST and the return
9228    value is TRUE if the cost calculation is final, or FALSE if the
9229    caller should recurse through the operands of X to add additional
9230    costs.
9231
9232    We currently make no attempt to model the size savings of Thumb-2
9233    16-bit instructions.  At the normal points in compilation where
9234    this code is called we have no measure of whether the condition
9235    flags are live or not, and thus no realistic way to determine what
9236    the size will eventually be.  */
9237 static bool
9238 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9239                    const struct cpu_cost_table *extra_cost,
9240                    int *cost, bool speed_p)
9241 {
9242   machine_mode mode = GET_MODE (x);
9243
9244   *cost = COSTS_N_INSNS (1);
9245
9246   if (TARGET_THUMB1)
9247     {
9248       if (speed_p)
9249         *cost = thumb1_rtx_costs (x, code, outer_code);
9250       else
9251         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9252       return true;
9253     }
9254
9255   switch (code)
9256     {
9257     case SET:
9258       *cost = 0;
9259       /* SET RTXs don't have a mode so we get it from the destination.  */
9260       mode = GET_MODE (SET_DEST (x));
9261
9262       if (REG_P (SET_SRC (x))
9263           && REG_P (SET_DEST (x)))
9264         {
9265           /* Assume that most copies can be done with a single insn,
9266              unless we don't have HW FP, in which case everything
9267              larger than word mode will require two insns.  */
9268           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269                                    && GET_MODE_SIZE (mode) > 4)
9270                                   || mode == DImode)
9271                                  ? 2 : 1);
9272           /* Conditional register moves can be encoded
9273              in 16 bits in Thumb mode.  */
9274           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9275             *cost >>= 1;
9276
9277           return true;
9278         }
9279
9280       if (CONST_INT_P (SET_SRC (x)))
9281         {
9282           /* Handle CONST_INT here, since the value doesn't have a mode
9283              and we would otherwise be unable to work out the true cost.  */
9284           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9285                             0, speed_p);
9286           outer_code = SET;
9287           /* Slightly lower the cost of setting a core reg to a constant.
9288              This helps break up chains and allows for better scheduling.  */
9289           if (REG_P (SET_DEST (x))
9290               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9291             *cost -= 1;
9292           x = SET_SRC (x);
9293           /* Immediate moves with an immediate in the range [0, 255] can be
9294              encoded in 16 bits in Thumb mode.  */
9295           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9296               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9297             *cost >>= 1;
9298           goto const_int_cost;
9299         }
9300
9301       return false;
9302
9303     case MEM:
9304       /* A memory access costs 1 insn if the mode is small, or the address is
9305          a single register, otherwise it costs one insn per word.  */
9306       if (REG_P (XEXP (x, 0)))
9307         *cost = COSTS_N_INSNS (1);
9308       else if (flag_pic
9309                && GET_CODE (XEXP (x, 0)) == PLUS
9310                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9311         /* This will be split into two instructions.
9312            See arm.md:calculate_pic_address.  */
9313         *cost = COSTS_N_INSNS (2);
9314       else
9315         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9316
9317       /* For speed optimizations, add the costs of the address and
9318          accessing memory.  */
9319       if (speed_p)
9320 #ifdef NOT_YET
9321         *cost += (extra_cost->ldst.load
9322                   + arm_address_cost (XEXP (x, 0), mode,
9323                                       ADDR_SPACE_GENERIC, speed_p));
9324 #else
9325         *cost += extra_cost->ldst.load;
9326 #endif
9327       return true;
9328
9329     case PARALLEL:
9330     {
9331    /* Calculations of LDM costs are complex.  We assume an initial cost
9332    (ldm_1st) which will load the number of registers mentioned in
9333    ldm_regs_per_insn_1st registers; then each additional
9334    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9335    formula for N regs is thus:
9336
9337    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9338                              + ldm_regs_per_insn_subsequent - 1)
9339                             / ldm_regs_per_insn_subsequent).
9340
9341    Additional costs may also be added for addressing.  A similar
9342    formula is used for STM.  */
9343
9344       bool is_ldm = load_multiple_operation (x, SImode);
9345       bool is_stm = store_multiple_operation (x, SImode);
9346
9347       if (is_ldm || is_stm)
9348         {
9349           if (speed_p)
9350             {
9351               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9352               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9353                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9354                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9355               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9356                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9357                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9358
9359               *cost += regs_per_insn_1st
9360                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9361                                             + regs_per_insn_sub - 1)
9362                                           / regs_per_insn_sub);
9363               return true;
9364             }
9365
9366         }
9367       return false;
9368     }
9369     case DIV:
9370     case UDIV:
9371       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9372           && (mode == SFmode || !TARGET_VFP_SINGLE))
9373         *cost += COSTS_N_INSNS (speed_p
9374                                ? extra_cost->fp[mode != SFmode].div : 0);
9375       else if (mode == SImode && TARGET_IDIV)
9376         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9377       else
9378         *cost = LIBCALL_COST (2);
9379
9380       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9381          possible udiv is prefered.  */
9382       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9383       return false;     /* All arguments must be in registers.  */
9384
9385     case MOD:
9386       /* MOD by a power of 2 can be expanded as:
9387          rsbs    r1, r0, #0
9388          and     r0, r0, #(n - 1)
9389          and     r1, r1, #(n - 1)
9390          rsbpl   r0, r1, #0.  */
9391       if (CONST_INT_P (XEXP (x, 1))
9392           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9393           && mode == SImode)
9394         {
9395           *cost += COSTS_N_INSNS (3);
9396
9397           if (speed_p)
9398             *cost += 2 * extra_cost->alu.logical
9399                      + extra_cost->alu.arith;
9400           return true;
9401         }
9402
9403     /* Fall-through.  */
9404     case UMOD:
9405       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9406          possible udiv is prefered.  */
9407       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9408       return false;     /* All arguments must be in registers.  */
9409
9410     case ROTATE:
9411       if (mode == SImode && REG_P (XEXP (x, 1)))
9412         {
9413           *cost += (COSTS_N_INSNS (1)
9414                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9415           if (speed_p)
9416             *cost += extra_cost->alu.shift_reg;
9417           return true;
9418         }
9419       /* Fall through */
9420     case ROTATERT:
9421     case ASHIFT:
9422     case LSHIFTRT:
9423     case ASHIFTRT:
9424       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9425         {
9426           *cost += (COSTS_N_INSNS (2)
9427                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9428           if (speed_p)
9429             *cost += 2 * extra_cost->alu.shift;
9430           return true;
9431         }
9432       else if (mode == SImode)
9433         {
9434           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9435           /* Slightly disparage register shifts at -Os, but not by much.  */
9436           if (!CONST_INT_P (XEXP (x, 1)))
9437             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9438                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9439           return true;
9440         }
9441       else if (GET_MODE_CLASS (mode) == MODE_INT
9442                && GET_MODE_SIZE (mode) < 4)
9443         {
9444           if (code == ASHIFT)
9445             {
9446               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9447               /* Slightly disparage register shifts at -Os, but not by
9448                  much.  */
9449               if (!CONST_INT_P (XEXP (x, 1)))
9450                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9451                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9452             }
9453           else if (code == LSHIFTRT || code == ASHIFTRT)
9454             {
9455               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9456                 {
9457                   /* Can use SBFX/UBFX.  */
9458                   if (speed_p)
9459                     *cost += extra_cost->alu.bfx;
9460                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9461                 }
9462               else
9463                 {
9464                   *cost += COSTS_N_INSNS (1);
9465                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9466                   if (speed_p)
9467                     {
9468                       if (CONST_INT_P (XEXP (x, 1)))
9469                         *cost += 2 * extra_cost->alu.shift;
9470                       else
9471                         *cost += (extra_cost->alu.shift
9472                                   + extra_cost->alu.shift_reg);
9473                     }
9474                   else
9475                     /* Slightly disparage register shifts.  */
9476                     *cost += !CONST_INT_P (XEXP (x, 1));
9477                 }
9478             }
9479           else /* Rotates.  */
9480             {
9481               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9482               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9483               if (speed_p)
9484                 {
9485                   if (CONST_INT_P (XEXP (x, 1)))
9486                     *cost += (2 * extra_cost->alu.shift
9487                               + extra_cost->alu.log_shift);
9488                   else
9489                     *cost += (extra_cost->alu.shift
9490                               + extra_cost->alu.shift_reg
9491                               + extra_cost->alu.log_shift_reg);
9492                 }
9493             }
9494           return true;
9495         }
9496
9497       *cost = LIBCALL_COST (2);
9498       return false;
9499
9500     case BSWAP:
9501       if (arm_arch6)
9502         {
9503           if (mode == SImode)
9504             {
9505               if (speed_p)
9506                 *cost += extra_cost->alu.rev;
9507
9508               return false;
9509             }
9510         }
9511       else
9512         {
9513         /* No rev instruction available.  Look at arm_legacy_rev
9514            and thumb_legacy_rev for the form of RTL used then.  */
9515           if (TARGET_THUMB)
9516             {
9517               *cost += COSTS_N_INSNS (9);
9518
9519               if (speed_p)
9520                 {
9521                   *cost += 6 * extra_cost->alu.shift;
9522                   *cost += 3 * extra_cost->alu.logical;
9523                 }
9524             }
9525           else
9526             {
9527               *cost += COSTS_N_INSNS (4);
9528
9529               if (speed_p)
9530                 {
9531                   *cost += 2 * extra_cost->alu.shift;
9532                   *cost += extra_cost->alu.arith_shift;
9533                   *cost += 2 * extra_cost->alu.logical;
9534                 }
9535             }
9536           return true;
9537         }
9538       return false;
9539
9540     case MINUS:
9541       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9542           && (mode == SFmode || !TARGET_VFP_SINGLE))
9543         {
9544           if (GET_CODE (XEXP (x, 0)) == MULT
9545               || GET_CODE (XEXP (x, 1)) == MULT)
9546             {
9547               rtx mul_op0, mul_op1, sub_op;
9548
9549               if (speed_p)
9550                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9551
9552               if (GET_CODE (XEXP (x, 0)) == MULT)
9553                 {
9554                   mul_op0 = XEXP (XEXP (x, 0), 0);
9555                   mul_op1 = XEXP (XEXP (x, 0), 1);
9556                   sub_op = XEXP (x, 1);
9557                 }
9558               else
9559                 {
9560                   mul_op0 = XEXP (XEXP (x, 1), 0);
9561                   mul_op1 = XEXP (XEXP (x, 1), 1);
9562                   sub_op = XEXP (x, 0);
9563                 }
9564
9565               /* The first operand of the multiply may be optionally
9566                  negated.  */
9567               if (GET_CODE (mul_op0) == NEG)
9568                 mul_op0 = XEXP (mul_op0, 0);
9569
9570               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9571                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9572                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9573
9574               return true;
9575             }
9576
9577           if (speed_p)
9578             *cost += extra_cost->fp[mode != SFmode].addsub;
9579           return false;
9580         }
9581
9582       if (mode == SImode)
9583         {
9584           rtx shift_by_reg = NULL;
9585           rtx shift_op;
9586           rtx non_shift_op;
9587
9588           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9589           if (shift_op == NULL)
9590             {
9591               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9592               non_shift_op = XEXP (x, 0);
9593             }
9594           else
9595             non_shift_op = XEXP (x, 1);
9596
9597           if (shift_op != NULL)
9598             {
9599               if (shift_by_reg != NULL)
9600                 {
9601                   if (speed_p)
9602                     *cost += extra_cost->alu.arith_shift_reg;
9603                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9604                 }
9605               else if (speed_p)
9606                 *cost += extra_cost->alu.arith_shift;
9607
9608               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9609               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9610               return true;
9611             }
9612
9613           if (arm_arch_thumb2
9614               && GET_CODE (XEXP (x, 1)) == MULT)
9615             {
9616               /* MLS.  */
9617               if (speed_p)
9618                 *cost += extra_cost->mult[0].add;
9619               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9620               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9621               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9622               return true;
9623             }
9624
9625           if (CONST_INT_P (XEXP (x, 0)))
9626             {
9627               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9628                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9629                                             NULL_RTX, 1, 0);
9630               *cost = COSTS_N_INSNS (insns);
9631               if (speed_p)
9632                 *cost += insns * extra_cost->alu.arith;
9633               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9634               return true;
9635             }
9636           else if (speed_p)
9637             *cost += extra_cost->alu.arith;
9638
9639           return false;
9640         }
9641
9642       if (GET_MODE_CLASS (mode) == MODE_INT
9643           && GET_MODE_SIZE (mode) < 4)
9644         {
9645           rtx shift_op, shift_reg;
9646           shift_reg = NULL;
9647
9648           /* We check both sides of the MINUS for shifter operands since,
9649              unlike PLUS, it's not commutative.  */
9650
9651           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9652           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9653
9654           /* Slightly disparage, as we might need to widen the result.  */
9655           *cost += 1;
9656           if (speed_p)
9657             *cost += extra_cost->alu.arith;
9658
9659           if (CONST_INT_P (XEXP (x, 0)))
9660             {
9661               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9662               return true;
9663             }
9664
9665           return false;
9666         }
9667
9668       if (mode == DImode)
9669         {
9670           *cost += COSTS_N_INSNS (1);
9671
9672           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9673             {
9674               rtx op1 = XEXP (x, 1);
9675
9676               if (speed_p)
9677                 *cost += 2 * extra_cost->alu.arith;
9678
9679               if (GET_CODE (op1) == ZERO_EXTEND)
9680                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9681                                    0, speed_p);
9682               else
9683                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9684               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9685                                  0, speed_p);
9686               return true;
9687             }
9688           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9689             {
9690               if (speed_p)
9691                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9692               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9693                                   0, speed_p)
9694                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9695               return true;
9696             }
9697           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9698                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9699             {
9700               if (speed_p)
9701                 *cost += (extra_cost->alu.arith
9702                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9703                              ? extra_cost->alu.arith
9704                              : extra_cost->alu.arith_shift));
9705               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9706                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9707                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9708               return true;
9709             }
9710
9711           if (speed_p)
9712             *cost += 2 * extra_cost->alu.arith;
9713           return false;
9714         }
9715
9716       /* Vector mode?  */
9717
9718       *cost = LIBCALL_COST (2);
9719       return false;
9720
9721     case PLUS:
9722       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9723           && (mode == SFmode || !TARGET_VFP_SINGLE))
9724         {
9725           if (GET_CODE (XEXP (x, 0)) == MULT)
9726             {
9727               rtx mul_op0, mul_op1, add_op;
9728
9729               if (speed_p)
9730                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9731
9732               mul_op0 = XEXP (XEXP (x, 0), 0);
9733               mul_op1 = XEXP (XEXP (x, 0), 1);
9734               add_op = XEXP (x, 1);
9735
9736               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9737                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9738                         + rtx_cost (add_op, mode, code, 0, speed_p));
9739
9740               return true;
9741             }
9742
9743           if (speed_p)
9744             *cost += extra_cost->fp[mode != SFmode].addsub;
9745           return false;
9746         }
9747       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9748         {
9749           *cost = LIBCALL_COST (2);
9750           return false;
9751         }
9752
9753         /* Narrow modes can be synthesized in SImode, but the range
9754            of useful sub-operations is limited.  Check for shift operations
9755            on one of the operands.  Only left shifts can be used in the
9756            narrow modes.  */
9757       if (GET_MODE_CLASS (mode) == MODE_INT
9758           && GET_MODE_SIZE (mode) < 4)
9759         {
9760           rtx shift_op, shift_reg;
9761           shift_reg = NULL;
9762
9763           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9764
9765           if (CONST_INT_P (XEXP (x, 1)))
9766             {
9767               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9768                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9769                                             NULL_RTX, 1, 0);
9770               *cost = COSTS_N_INSNS (insns);
9771               if (speed_p)
9772                 *cost += insns * extra_cost->alu.arith;
9773               /* Slightly penalize a narrow operation as the result may
9774                  need widening.  */
9775               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9776               return true;
9777             }
9778
9779           /* Slightly penalize a narrow operation as the result may
9780              need widening.  */
9781           *cost += 1;
9782           if (speed_p)
9783             *cost += extra_cost->alu.arith;
9784
9785           return false;
9786         }
9787
9788       if (mode == SImode)
9789         {
9790           rtx shift_op, shift_reg;
9791
9792           if (TARGET_INT_SIMD
9793               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9794                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9795             {
9796               /* UXTA[BH] or SXTA[BH].  */
9797               if (speed_p)
9798                 *cost += extra_cost->alu.extend_arith;
9799               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9800                                   0, speed_p)
9801                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9802               return true;
9803             }
9804
9805           shift_reg = NULL;
9806           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9807           if (shift_op != NULL)
9808             {
9809               if (shift_reg)
9810                 {
9811                   if (speed_p)
9812                     *cost += extra_cost->alu.arith_shift_reg;
9813                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9814                 }
9815               else if (speed_p)
9816                 *cost += extra_cost->alu.arith_shift;
9817
9818               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9819                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9820               return true;
9821             }
9822           if (GET_CODE (XEXP (x, 0)) == MULT)
9823             {
9824               rtx mul_op = XEXP (x, 0);
9825
9826               if (TARGET_DSP_MULTIPLY
9827                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9828                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9829                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9830                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9831                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9832                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9833                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9834                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9835                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9836                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9837                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9838                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9839                                       == 16))))))
9840                 {
9841                   /* SMLA[BT][BT].  */
9842                   if (speed_p)
9843                     *cost += extra_cost->mult[0].extend_add;
9844                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9845                                       SIGN_EXTEND, 0, speed_p)
9846                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9847                                         SIGN_EXTEND, 0, speed_p)
9848                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9849                   return true;
9850                 }
9851
9852               if (speed_p)
9853                 *cost += extra_cost->mult[0].add;
9854               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9855                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9856                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9857               return true;
9858             }
9859           if (CONST_INT_P (XEXP (x, 1)))
9860             {
9861               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9862                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9863                                             NULL_RTX, 1, 0);
9864               *cost = COSTS_N_INSNS (insns);
9865               if (speed_p)
9866                 *cost += insns * extra_cost->alu.arith;
9867               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9868               return true;
9869             }
9870           else if (speed_p)
9871             *cost += extra_cost->alu.arith;
9872
9873           return false;
9874         }
9875
9876       if (mode == DImode)
9877         {
9878           if (arm_arch3m
9879               && GET_CODE (XEXP (x, 0)) == MULT
9880               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9881                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9882                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9883                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9884             {
9885               if (speed_p)
9886                 *cost += extra_cost->mult[1].extend_add;
9887               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9888                                   ZERO_EXTEND, 0, speed_p)
9889                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9890                                     ZERO_EXTEND, 0, speed_p)
9891                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9892               return true;
9893             }
9894
9895           *cost += COSTS_N_INSNS (1);
9896
9897           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9898               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9899             {
9900               if (speed_p)
9901                 *cost += (extra_cost->alu.arith
9902                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9903                              ? extra_cost->alu.arith
9904                              : extra_cost->alu.arith_shift));
9905
9906               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9907                                   0, speed_p)
9908                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9909               return true;
9910             }
9911
9912           if (speed_p)
9913             *cost += 2 * extra_cost->alu.arith;
9914           return false;
9915         }
9916
9917       /* Vector mode?  */
9918       *cost = LIBCALL_COST (2);
9919       return false;
9920     case IOR:
9921       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9922         {
9923           if (speed_p)
9924             *cost += extra_cost->alu.rev;
9925
9926           return true;
9927         }
9928     /* Fall through.  */
9929     case AND: case XOR:
9930       if (mode == SImode)
9931         {
9932           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9933           rtx op0 = XEXP (x, 0);
9934           rtx shift_op, shift_reg;
9935
9936           if (subcode == NOT
9937               && (code == AND
9938                   || (code == IOR && TARGET_THUMB2)))
9939             op0 = XEXP (op0, 0);
9940
9941           shift_reg = NULL;
9942           shift_op = shifter_op_p (op0, &shift_reg);
9943           if (shift_op != NULL)
9944             {
9945               if (shift_reg)
9946                 {
9947                   if (speed_p)
9948                     *cost += extra_cost->alu.log_shift_reg;
9949                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9950                 }
9951               else if (speed_p)
9952                 *cost += extra_cost->alu.log_shift;
9953
9954               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9955                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9956               return true;
9957             }
9958
9959           if (CONST_INT_P (XEXP (x, 1)))
9960             {
9961               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9962                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9963                                             NULL_RTX, 1, 0);
9964
9965               *cost = COSTS_N_INSNS (insns);
9966               if (speed_p)
9967                 *cost += insns * extra_cost->alu.logical;
9968               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9969               return true;
9970             }
9971
9972           if (speed_p)
9973             *cost += extra_cost->alu.logical;
9974           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9975                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9976           return true;
9977         }
9978
9979       if (mode == DImode)
9980         {
9981           rtx op0 = XEXP (x, 0);
9982           enum rtx_code subcode = GET_CODE (op0);
9983
9984           *cost += COSTS_N_INSNS (1);
9985
9986           if (subcode == NOT
9987               && (code == AND
9988                   || (code == IOR && TARGET_THUMB2)))
9989             op0 = XEXP (op0, 0);
9990
9991           if (GET_CODE (op0) == ZERO_EXTEND)
9992             {
9993               if (speed_p)
9994                 *cost += 2 * extra_cost->alu.logical;
9995
9996               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9997                                   0, speed_p)
9998                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9999               return true;
10000             }
10001           else if (GET_CODE (op0) == SIGN_EXTEND)
10002             {
10003               if (speed_p)
10004                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10005
10006               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10007                                   0, speed_p)
10008                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10009               return true;
10010             }
10011
10012           if (speed_p)
10013             *cost += 2 * extra_cost->alu.logical;
10014
10015           return true;
10016         }
10017       /* Vector mode?  */
10018
10019       *cost = LIBCALL_COST (2);
10020       return false;
10021
10022     case MULT:
10023       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10024           && (mode == SFmode || !TARGET_VFP_SINGLE))
10025         {
10026           rtx op0 = XEXP (x, 0);
10027
10028           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10029             op0 = XEXP (op0, 0);
10030
10031           if (speed_p)
10032             *cost += extra_cost->fp[mode != SFmode].mult;
10033
10034           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10035                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10036           return true;
10037         }
10038       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10039         {
10040           *cost = LIBCALL_COST (2);
10041           return false;
10042         }
10043
10044       if (mode == SImode)
10045         {
10046           if (TARGET_DSP_MULTIPLY
10047               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10048                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10049                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10050                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10051                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10052                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10053                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10054                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10055                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10056                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10057                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10058                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10059                                   == 16))))))
10060             {
10061               /* SMUL[TB][TB].  */
10062               if (speed_p)
10063                 *cost += extra_cost->mult[0].extend;
10064               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10065                                  SIGN_EXTEND, 0, speed_p);
10066               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10067                                  SIGN_EXTEND, 1, speed_p);
10068               return true;
10069             }
10070           if (speed_p)
10071             *cost += extra_cost->mult[0].simple;
10072           return false;
10073         }
10074
10075       if (mode == DImode)
10076         {
10077           if (arm_arch3m
10078               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10079                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10080                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10081                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10082             {
10083               if (speed_p)
10084                 *cost += extra_cost->mult[1].extend;
10085               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10086                                   ZERO_EXTEND, 0, speed_p)
10087                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10088                                     ZERO_EXTEND, 0, speed_p));
10089               return true;
10090             }
10091
10092           *cost = LIBCALL_COST (2);
10093           return false;
10094         }
10095
10096       /* Vector mode?  */
10097       *cost = LIBCALL_COST (2);
10098       return false;
10099
10100     case NEG:
10101       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10102           && (mode == SFmode || !TARGET_VFP_SINGLE))
10103         {
10104           if (GET_CODE (XEXP (x, 0)) == MULT)
10105             {
10106               /* VNMUL.  */
10107               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10108               return true;
10109             }
10110
10111           if (speed_p)
10112             *cost += extra_cost->fp[mode != SFmode].neg;
10113
10114           return false;
10115         }
10116       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10117         {
10118           *cost = LIBCALL_COST (1);
10119           return false;
10120         }
10121
10122       if (mode == SImode)
10123         {
10124           if (GET_CODE (XEXP (x, 0)) == ABS)
10125             {
10126               *cost += COSTS_N_INSNS (1);
10127               /* Assume the non-flag-changing variant.  */
10128               if (speed_p)
10129                 *cost += (extra_cost->alu.log_shift
10130                           + extra_cost->alu.arith_shift);
10131               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10132               return true;
10133             }
10134
10135           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10136               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10137             {
10138               *cost += COSTS_N_INSNS (1);
10139               /* No extra cost for MOV imm and MVN imm.  */
10140               /* If the comparison op is using the flags, there's no further
10141                  cost, otherwise we need to add the cost of the comparison.  */
10142               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10143                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10144                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10145                 {
10146                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10147                   *cost += (COSTS_N_INSNS (1)
10148                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10149                                         0, speed_p)
10150                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10151                                         1, speed_p));
10152                   if (speed_p)
10153                     *cost += extra_cost->alu.arith;
10154                 }
10155               return true;
10156             }
10157
10158           if (speed_p)
10159             *cost += extra_cost->alu.arith;
10160           return false;
10161         }
10162
10163       if (GET_MODE_CLASS (mode) == MODE_INT
10164           && GET_MODE_SIZE (mode) < 4)
10165         {
10166           /* Slightly disparage, as we might need an extend operation.  */
10167           *cost += 1;
10168           if (speed_p)
10169             *cost += extra_cost->alu.arith;
10170           return false;
10171         }
10172
10173       if (mode == DImode)
10174         {
10175           *cost += COSTS_N_INSNS (1);
10176           if (speed_p)
10177             *cost += 2 * extra_cost->alu.arith;
10178           return false;
10179         }
10180
10181       /* Vector mode?  */
10182       *cost = LIBCALL_COST (1);
10183       return false;
10184
10185     case NOT:
10186       if (mode == SImode)
10187         {
10188           rtx shift_op;
10189           rtx shift_reg = NULL;
10190
10191           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10192
10193           if (shift_op)
10194             {
10195               if (shift_reg != NULL)
10196                 {
10197                   if (speed_p)
10198                     *cost += extra_cost->alu.log_shift_reg;
10199                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10200                 }
10201               else if (speed_p)
10202                 *cost += extra_cost->alu.log_shift;
10203               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10204               return true;
10205             }
10206
10207           if (speed_p)
10208             *cost += extra_cost->alu.logical;
10209           return false;
10210         }
10211       if (mode == DImode)
10212         {
10213           *cost += COSTS_N_INSNS (1);
10214           return false;
10215         }
10216
10217       /* Vector mode?  */
10218
10219       *cost += LIBCALL_COST (1);
10220       return false;
10221
10222     case IF_THEN_ELSE:
10223       {
10224         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10225           {
10226             *cost += COSTS_N_INSNS (3);
10227             return true;
10228           }
10229         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10230         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10231
10232         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10233         /* Assume that if one arm of the if_then_else is a register,
10234            that it will be tied with the result and eliminate the
10235            conditional insn.  */
10236         if (REG_P (XEXP (x, 1)))
10237           *cost += op2cost;
10238         else if (REG_P (XEXP (x, 2)))
10239           *cost += op1cost;
10240         else
10241           {
10242             if (speed_p)
10243               {
10244                 if (extra_cost->alu.non_exec_costs_exec)
10245                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10246                 else
10247                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10248               }
10249             else
10250               *cost += op1cost + op2cost;
10251           }
10252       }
10253       return true;
10254
10255     case COMPARE:
10256       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10257         *cost = 0;
10258       else
10259         {
10260           machine_mode op0mode;
10261           /* We'll mostly assume that the cost of a compare is the cost of the
10262              LHS.  However, there are some notable exceptions.  */
10263
10264           /* Floating point compares are never done as side-effects.  */
10265           op0mode = GET_MODE (XEXP (x, 0));
10266           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10267               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10268             {
10269               if (speed_p)
10270                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10271
10272               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10273                 {
10274                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10275                   return true;
10276                 }
10277
10278               return false;
10279             }
10280           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10281             {
10282               *cost = LIBCALL_COST (2);
10283               return false;
10284             }
10285
10286           /* DImode compares normally take two insns.  */
10287           if (op0mode == DImode)
10288             {
10289               *cost += COSTS_N_INSNS (1);
10290               if (speed_p)
10291                 *cost += 2 * extra_cost->alu.arith;
10292               return false;
10293             }
10294
10295           if (op0mode == SImode)
10296             {
10297               rtx shift_op;
10298               rtx shift_reg;
10299
10300               if (XEXP (x, 1) == const0_rtx
10301                   && !(REG_P (XEXP (x, 0))
10302                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10303                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10304                 {
10305                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10306
10307                   /* Multiply operations that set the flags are often
10308                      significantly more expensive.  */
10309                   if (speed_p
10310                       && GET_CODE (XEXP (x, 0)) == MULT
10311                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10312                     *cost += extra_cost->mult[0].flag_setting;
10313
10314                   if (speed_p
10315                       && GET_CODE (XEXP (x, 0)) == PLUS
10316                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10317                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10318                                                             0), 1), mode))
10319                     *cost += extra_cost->mult[0].flag_setting;
10320                   return true;
10321                 }
10322
10323               shift_reg = NULL;
10324               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10325               if (shift_op != NULL)
10326                 {
10327                   if (shift_reg != NULL)
10328                     {
10329                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10330                                          1, speed_p);
10331                       if (speed_p)
10332                         *cost += extra_cost->alu.arith_shift_reg;
10333                     }
10334                   else if (speed_p)
10335                     *cost += extra_cost->alu.arith_shift;
10336                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10337                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10338                   return true;
10339                 }
10340
10341               if (speed_p)
10342                 *cost += extra_cost->alu.arith;
10343               if (CONST_INT_P (XEXP (x, 1))
10344                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10345                 {
10346                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10347                   return true;
10348                 }
10349               return false;
10350             }
10351
10352           /* Vector mode?  */
10353
10354           *cost = LIBCALL_COST (2);
10355           return false;
10356         }
10357       return true;
10358
10359     case EQ:
10360     case NE:
10361     case LT:
10362     case LE:
10363     case GT:
10364     case GE:
10365     case LTU:
10366     case LEU:
10367     case GEU:
10368     case GTU:
10369     case ORDERED:
10370     case UNORDERED:
10371     case UNEQ:
10372     case UNLE:
10373     case UNLT:
10374     case UNGE:
10375     case UNGT:
10376     case LTGT:
10377       if (outer_code == SET)
10378         {
10379           /* Is it a store-flag operation?  */
10380           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10381               && XEXP (x, 1) == const0_rtx)
10382             {
10383               /* Thumb also needs an IT insn.  */
10384               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10385               return true;
10386             }
10387           if (XEXP (x, 1) == const0_rtx)
10388             {
10389               switch (code)
10390                 {
10391                 case LT:
10392                   /* LSR Rd, Rn, #31.  */
10393                   if (speed_p)
10394                     *cost += extra_cost->alu.shift;
10395                   break;
10396
10397                 case EQ:
10398                   /* RSBS T1, Rn, #0
10399                      ADC  Rd, Rn, T1.  */
10400
10401                 case NE:
10402                   /* SUBS T1, Rn, #1
10403                      SBC  Rd, Rn, T1.  */
10404                   *cost += COSTS_N_INSNS (1);
10405                   break;
10406
10407                 case LE:
10408                   /* RSBS T1, Rn, Rn, LSR #31
10409                      ADC  Rd, Rn, T1. */
10410                   *cost += COSTS_N_INSNS (1);
10411                   if (speed_p)
10412                     *cost += extra_cost->alu.arith_shift;
10413                   break;
10414
10415                 case GT:
10416                   /* RSB  Rd, Rn, Rn, ASR #1
10417                      LSR  Rd, Rd, #31.  */
10418                   *cost += COSTS_N_INSNS (1);
10419                   if (speed_p)
10420                     *cost += (extra_cost->alu.arith_shift
10421                               + extra_cost->alu.shift);
10422                   break;
10423
10424                 case GE:
10425                   /* ASR  Rd, Rn, #31
10426                      ADD  Rd, Rn, #1.  */
10427                   *cost += COSTS_N_INSNS (1);
10428                   if (speed_p)
10429                     *cost += extra_cost->alu.shift;
10430                   break;
10431
10432                 default:
10433                   /* Remaining cases are either meaningless or would take
10434                      three insns anyway.  */
10435                   *cost = COSTS_N_INSNS (3);
10436                   break;
10437                 }
10438               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10439               return true;
10440             }
10441           else
10442             {
10443               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10444               if (CONST_INT_P (XEXP (x, 1))
10445                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10446                 {
10447                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10448                   return true;
10449                 }
10450
10451               return false;
10452             }
10453         }
10454       /* Not directly inside a set.  If it involves the condition code
10455          register it must be the condition for a branch, cond_exec or
10456          I_T_E operation.  Since the comparison is performed elsewhere
10457          this is just the control part which has no additional
10458          cost.  */
10459       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10460                && XEXP (x, 1) == const0_rtx)
10461         {
10462           *cost = 0;
10463           return true;
10464         }
10465       return false;
10466
10467     case ABS:
10468       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10469           && (mode == SFmode || !TARGET_VFP_SINGLE))
10470         {
10471           if (speed_p)
10472             *cost += extra_cost->fp[mode != SFmode].neg;
10473
10474           return false;
10475         }
10476       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10477         {
10478           *cost = LIBCALL_COST (1);
10479           return false;
10480         }
10481
10482       if (mode == SImode)
10483         {
10484           if (speed_p)
10485             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10486           return false;
10487         }
10488       /* Vector mode?  */
10489       *cost = LIBCALL_COST (1);
10490       return false;
10491
10492     case SIGN_EXTEND:
10493       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10494           && MEM_P (XEXP (x, 0)))
10495         {
10496           if (mode == DImode)
10497             *cost += COSTS_N_INSNS (1);
10498
10499           if (!speed_p)
10500             return true;
10501
10502           if (GET_MODE (XEXP (x, 0)) == SImode)
10503             *cost += extra_cost->ldst.load;
10504           else
10505             *cost += extra_cost->ldst.load_sign_extend;
10506
10507           if (mode == DImode)
10508             *cost += extra_cost->alu.shift;
10509
10510           return true;
10511         }
10512
10513       /* Widening from less than 32-bits requires an extend operation.  */
10514       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10515         {
10516           /* We have SXTB/SXTH.  */
10517           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10518           if (speed_p)
10519             *cost += extra_cost->alu.extend;
10520         }
10521       else if (GET_MODE (XEXP (x, 0)) != SImode)
10522         {
10523           /* Needs two shifts.  */
10524           *cost += COSTS_N_INSNS (1);
10525           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10526           if (speed_p)
10527             *cost += 2 * extra_cost->alu.shift;
10528         }
10529
10530       /* Widening beyond 32-bits requires one more insn.  */
10531       if (mode == DImode)
10532         {
10533           *cost += COSTS_N_INSNS (1);
10534           if (speed_p)
10535             *cost += extra_cost->alu.shift;
10536         }
10537
10538       return true;
10539
10540     case ZERO_EXTEND:
10541       if ((arm_arch4
10542            || GET_MODE (XEXP (x, 0)) == SImode
10543            || GET_MODE (XEXP (x, 0)) == QImode)
10544           && MEM_P (XEXP (x, 0)))
10545         {
10546           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10547
10548           if (mode == DImode)
10549             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10550
10551           return true;
10552         }
10553
10554       /* Widening from less than 32-bits requires an extend operation.  */
10555       if (GET_MODE (XEXP (x, 0)) == QImode)
10556         {
10557           /* UXTB can be a shorter instruction in Thumb2, but it might
10558              be slower than the AND Rd, Rn, #255 alternative.  When
10559              optimizing for speed it should never be slower to use
10560              AND, and we don't really model 16-bit vs 32-bit insns
10561              here.  */
10562           if (speed_p)
10563             *cost += extra_cost->alu.logical;
10564         }
10565       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10566         {
10567           /* We have UXTB/UXTH.  */
10568           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10569           if (speed_p)
10570             *cost += extra_cost->alu.extend;
10571         }
10572       else if (GET_MODE (XEXP (x, 0)) != SImode)
10573         {
10574           /* Needs two shifts.  It's marginally preferable to use
10575              shifts rather than two BIC instructions as the second
10576              shift may merge with a subsequent insn as a shifter
10577              op.  */
10578           *cost = COSTS_N_INSNS (2);
10579           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10580           if (speed_p)
10581             *cost += 2 * extra_cost->alu.shift;
10582         }
10583
10584       /* Widening beyond 32-bits requires one more insn.  */
10585       if (mode == DImode)
10586         {
10587           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10588         }
10589
10590       return true;
10591
10592     case CONST_INT:
10593       *cost = 0;
10594       /* CONST_INT has no mode, so we cannot tell for sure how many
10595          insns are really going to be needed.  The best we can do is
10596          look at the value passed.  If it fits in SImode, then assume
10597          that's the mode it will be used for.  Otherwise assume it
10598          will be used in DImode.  */
10599       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10600         mode = SImode;
10601       else
10602         mode = DImode;
10603
10604       /* Avoid blowing up in arm_gen_constant ().  */
10605       if (!(outer_code == PLUS
10606             || outer_code == AND
10607             || outer_code == IOR
10608             || outer_code == XOR
10609             || outer_code == MINUS))
10610         outer_code = SET;
10611
10612     const_int_cost:
10613       if (mode == SImode)
10614         {
10615           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10616                                                     INTVAL (x), NULL, NULL,
10617                                                     0, 0));
10618           /* Extra costs?  */
10619         }
10620       else
10621         {
10622           *cost += COSTS_N_INSNS (arm_gen_constant
10623                                   (outer_code, SImode, NULL,
10624                                    trunc_int_for_mode (INTVAL (x), SImode),
10625                                    NULL, NULL, 0, 0)
10626                                   + arm_gen_constant (outer_code, SImode, NULL,
10627                                                       INTVAL (x) >> 32, NULL,
10628                                                       NULL, 0, 0));
10629           /* Extra costs?  */
10630         }
10631
10632       return true;
10633
10634     case CONST:
10635     case LABEL_REF:
10636     case SYMBOL_REF:
10637       if (speed_p)
10638         {
10639           if (arm_arch_thumb2 && !flag_pic)
10640             *cost += COSTS_N_INSNS (1);
10641           else
10642             *cost += extra_cost->ldst.load;
10643         }
10644       else
10645         *cost += COSTS_N_INSNS (1);
10646
10647       if (flag_pic)
10648         {
10649           *cost += COSTS_N_INSNS (1);
10650           if (speed_p)
10651             *cost += extra_cost->alu.arith;
10652         }
10653
10654       return true;
10655
10656     case CONST_FIXED:
10657       *cost = COSTS_N_INSNS (4);
10658       /* Fixme.  */
10659       return true;
10660
10661     case CONST_DOUBLE:
10662       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10663           && (mode == SFmode || !TARGET_VFP_SINGLE))
10664         {
10665           if (vfp3_const_double_rtx (x))
10666             {
10667               if (speed_p)
10668                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10669               return true;
10670             }
10671
10672           if (speed_p)
10673             {
10674               if (mode == DFmode)
10675                 *cost += extra_cost->ldst.loadd;
10676               else
10677                 *cost += extra_cost->ldst.loadf;
10678             }
10679           else
10680             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10681
10682           return true;
10683         }
10684       *cost = COSTS_N_INSNS (4);
10685       return true;
10686
10687     case CONST_VECTOR:
10688       /* Fixme.  */
10689       if (TARGET_NEON
10690           && TARGET_HARD_FLOAT
10691           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10692           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10693         *cost = COSTS_N_INSNS (1);
10694       else
10695         *cost = COSTS_N_INSNS (4);
10696       return true;
10697
10698     case HIGH:
10699     case LO_SUM:
10700       /* When optimizing for size, we prefer constant pool entries to
10701          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10702       if (!speed_p)
10703         *cost += 1;
10704       return true;
10705
10706     case CLZ:
10707       if (speed_p)
10708         *cost += extra_cost->alu.clz;
10709       return false;
10710
10711     case SMIN:
10712       if (XEXP (x, 1) == const0_rtx)
10713         {
10714           if (speed_p)
10715             *cost += extra_cost->alu.log_shift;
10716           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10717           return true;
10718         }
10719       /* Fall through.  */
10720     case SMAX:
10721     case UMIN:
10722     case UMAX:
10723       *cost += COSTS_N_INSNS (1);
10724       return false;
10725
10726     case TRUNCATE:
10727       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10728           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10729           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10730           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10731           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10732                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10733               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10734                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10735                       == ZERO_EXTEND))))
10736         {
10737           if (speed_p)
10738             *cost += extra_cost->mult[1].extend;
10739           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10740                               ZERO_EXTEND, 0, speed_p)
10741                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10742                                 ZERO_EXTEND, 0, speed_p));
10743           return true;
10744         }
10745       *cost = LIBCALL_COST (1);
10746       return false;
10747
10748     case UNSPEC_VOLATILE:
10749     case UNSPEC:
10750       return arm_unspec_cost (x, outer_code, speed_p, cost);
10751
10752     case PC:
10753       /* Reading the PC is like reading any other register.  Writing it
10754          is more expensive, but we take that into account elsewhere.  */
10755       *cost = 0;
10756       return true;
10757
10758     case ZERO_EXTRACT:
10759       /* TODO: Simple zero_extract of bottom bits using AND.  */
10760       /* Fall through.  */
10761     case SIGN_EXTRACT:
10762       if (arm_arch6
10763           && mode == SImode
10764           && CONST_INT_P (XEXP (x, 1))
10765           && CONST_INT_P (XEXP (x, 2)))
10766         {
10767           if (speed_p)
10768             *cost += extra_cost->alu.bfx;
10769           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10770           return true;
10771         }
10772       /* Without UBFX/SBFX, need to resort to shift operations.  */
10773       *cost += COSTS_N_INSNS (1);
10774       if (speed_p)
10775         *cost += 2 * extra_cost->alu.shift;
10776       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10777       return true;
10778
10779     case FLOAT_EXTEND:
10780       if (TARGET_HARD_FLOAT)
10781         {
10782           if (speed_p)
10783             *cost += extra_cost->fp[mode == DFmode].widen;
10784           if (!TARGET_VFP5
10785               && GET_MODE (XEXP (x, 0)) == HFmode)
10786             {
10787               /* Pre v8, widening HF->DF is a two-step process, first
10788                  widening to SFmode.  */
10789               *cost += COSTS_N_INSNS (1);
10790               if (speed_p)
10791                 *cost += extra_cost->fp[0].widen;
10792             }
10793           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10794           return true;
10795         }
10796
10797       *cost = LIBCALL_COST (1);
10798       return false;
10799
10800     case FLOAT_TRUNCATE:
10801       if (TARGET_HARD_FLOAT)
10802         {
10803           if (speed_p)
10804             *cost += extra_cost->fp[mode == DFmode].narrow;
10805           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10806           return true;
10807           /* Vector modes?  */
10808         }
10809       *cost = LIBCALL_COST (1);
10810       return false;
10811
10812     case FMA:
10813       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10814         {
10815           rtx op0 = XEXP (x, 0);
10816           rtx op1 = XEXP (x, 1);
10817           rtx op2 = XEXP (x, 2);
10818
10819
10820           /* vfms or vfnma.  */
10821           if (GET_CODE (op0) == NEG)
10822             op0 = XEXP (op0, 0);
10823
10824           /* vfnms or vfnma.  */
10825           if (GET_CODE (op2) == NEG)
10826             op2 = XEXP (op2, 0);
10827
10828           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10829           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10830           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10831
10832           if (speed_p)
10833             *cost += extra_cost->fp[mode ==DFmode].fma;
10834
10835           return true;
10836         }
10837
10838       *cost = LIBCALL_COST (3);
10839       return false;
10840
10841     case FIX:
10842     case UNSIGNED_FIX:
10843       if (TARGET_HARD_FLOAT)
10844         {
10845           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10846              a vcvt fixed-point conversion.  */
10847           if (code == FIX && mode == SImode
10848               && GET_CODE (XEXP (x, 0)) == FIX
10849               && GET_MODE (XEXP (x, 0)) == SFmode
10850               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10851               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10852                  > 0)
10853             {
10854               if (speed_p)
10855                 *cost += extra_cost->fp[0].toint;
10856
10857               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10858                                  code, 0, speed_p);
10859               return true;
10860             }
10861
10862           if (GET_MODE_CLASS (mode) == MODE_INT)
10863             {
10864               mode = GET_MODE (XEXP (x, 0));
10865               if (speed_p)
10866                 *cost += extra_cost->fp[mode == DFmode].toint;
10867               /* Strip of the 'cost' of rounding towards zero.  */
10868               if (GET_CODE (XEXP (x, 0)) == FIX)
10869                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10870                                    0, speed_p);
10871               else
10872                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10873               /* ??? Increase the cost to deal with transferring from
10874                  FP -> CORE registers?  */
10875               return true;
10876             }
10877           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10878                    && TARGET_VFP5)
10879             {
10880               if (speed_p)
10881                 *cost += extra_cost->fp[mode == DFmode].roundint;
10882               return false;
10883             }
10884           /* Vector costs? */
10885         }
10886       *cost = LIBCALL_COST (1);
10887       return false;
10888
10889     case FLOAT:
10890     case UNSIGNED_FLOAT:
10891       if (TARGET_HARD_FLOAT)
10892         {
10893           /* ??? Increase the cost to deal with transferring from CORE
10894              -> FP registers?  */
10895           if (speed_p)
10896             *cost += extra_cost->fp[mode == DFmode].fromint;
10897           return false;
10898         }
10899       *cost = LIBCALL_COST (1);
10900       return false;
10901
10902     case CALL:
10903       return true;
10904
10905     case ASM_OPERANDS:
10906       {
10907       /* Just a guess.  Guess number of instructions in the asm
10908          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10909          though (see PR60663).  */
10910         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10911         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10912
10913         *cost = COSTS_N_INSNS (asm_length + num_operands);
10914         return true;
10915       }
10916     default:
10917       if (mode != VOIDmode)
10918         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10919       else
10920         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10921       return false;
10922     }
10923 }
10924
10925 #undef HANDLE_NARROW_SHIFT_ARITH
10926
10927 /* RTX costs entry point.  */
10928
10929 static bool
10930 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10931                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10932 {
10933   bool result;
10934   int code = GET_CODE (x);
10935   gcc_assert (current_tune->insn_extra_cost);
10936
10937   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10938                                 (enum rtx_code) outer_code,
10939                                 current_tune->insn_extra_cost,
10940                                 total, speed);
10941
10942   if (dump_file && (dump_flags & TDF_DETAILS))
10943     {
10944       print_rtl_single (dump_file, x);
10945       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10946                *total, result ? "final" : "partial");
10947     }
10948   return result;
10949 }
10950
10951 /* All address computations that can be done are free, but rtx cost returns
10952    the same for practically all of them.  So we weight the different types
10953    of address here in the order (most pref first):
10954    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10955 static inline int
10956 arm_arm_address_cost (rtx x)
10957 {
10958   enum rtx_code c  = GET_CODE (x);
10959
10960   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10961     return 0;
10962   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10963     return 10;
10964
10965   if (c == PLUS)
10966     {
10967       if (CONST_INT_P (XEXP (x, 1)))
10968         return 2;
10969
10970       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10971         return 3;
10972
10973       return 4;
10974     }
10975
10976   return 6;
10977 }
10978
10979 static inline int
10980 arm_thumb_address_cost (rtx x)
10981 {
10982   enum rtx_code c  = GET_CODE (x);
10983
10984   if (c == REG)
10985     return 1;
10986   if (c == PLUS
10987       && REG_P (XEXP (x, 0))
10988       && CONST_INT_P (XEXP (x, 1)))
10989     return 1;
10990
10991   return 2;
10992 }
10993
10994 static int
10995 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10996                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10997 {
10998   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10999 }
11000
11001 /* Adjust cost hook for XScale.  */
11002 static bool
11003 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11004                           int * cost)
11005 {
11006   /* Some true dependencies can have a higher cost depending
11007      on precisely how certain input operands are used.  */
11008   if (dep_type == 0
11009       && recog_memoized (insn) >= 0
11010       && recog_memoized (dep) >= 0)
11011     {
11012       int shift_opnum = get_attr_shift (insn);
11013       enum attr_type attr_type = get_attr_type (dep);
11014
11015       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11016          operand for INSN.  If we have a shifted input operand and the
11017          instruction we depend on is another ALU instruction, then we may
11018          have to account for an additional stall.  */
11019       if (shift_opnum != 0
11020           && (attr_type == TYPE_ALU_SHIFT_IMM
11021               || attr_type == TYPE_ALUS_SHIFT_IMM
11022               || attr_type == TYPE_LOGIC_SHIFT_IMM
11023               || attr_type == TYPE_LOGICS_SHIFT_IMM
11024               || attr_type == TYPE_ALU_SHIFT_REG
11025               || attr_type == TYPE_ALUS_SHIFT_REG
11026               || attr_type == TYPE_LOGIC_SHIFT_REG
11027               || attr_type == TYPE_LOGICS_SHIFT_REG
11028               || attr_type == TYPE_MOV_SHIFT
11029               || attr_type == TYPE_MVN_SHIFT
11030               || attr_type == TYPE_MOV_SHIFT_REG
11031               || attr_type == TYPE_MVN_SHIFT_REG))
11032         {
11033           rtx shifted_operand;
11034           int opno;
11035
11036           /* Get the shifted operand.  */
11037           extract_insn (insn);
11038           shifted_operand = recog_data.operand[shift_opnum];
11039
11040           /* Iterate over all the operands in DEP.  If we write an operand
11041              that overlaps with SHIFTED_OPERAND, then we have increase the
11042              cost of this dependency.  */
11043           extract_insn (dep);
11044           preprocess_constraints (dep);
11045           for (opno = 0; opno < recog_data.n_operands; opno++)
11046             {
11047               /* We can ignore strict inputs.  */
11048               if (recog_data.operand_type[opno] == OP_IN)
11049                 continue;
11050
11051               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11052                                            shifted_operand))
11053                 {
11054                   *cost = 2;
11055                   return false;
11056                 }
11057             }
11058         }
11059     }
11060   return true;
11061 }
11062
11063 /* Adjust cost hook for Cortex A9.  */
11064 static bool
11065 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11066                              int * cost)
11067 {
11068   switch (dep_type)
11069     {
11070     case REG_DEP_ANTI:
11071       *cost = 0;
11072       return false;
11073
11074     case REG_DEP_TRUE:
11075     case REG_DEP_OUTPUT:
11076         if (recog_memoized (insn) >= 0
11077             && recog_memoized (dep) >= 0)
11078           {
11079             if (GET_CODE (PATTERN (insn)) == SET)
11080               {
11081                 if (GET_MODE_CLASS
11082                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11083                   || GET_MODE_CLASS
11084                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11085                   {
11086                     enum attr_type attr_type_insn = get_attr_type (insn);
11087                     enum attr_type attr_type_dep = get_attr_type (dep);
11088
11089                     /* By default all dependencies of the form
11090                        s0 = s0 <op> s1
11091                        s0 = s0 <op> s2
11092                        have an extra latency of 1 cycle because
11093                        of the input and output dependency in this
11094                        case. However this gets modeled as an true
11095                        dependency and hence all these checks.  */
11096                     if (REG_P (SET_DEST (PATTERN (insn)))
11097                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11098                       {
11099                         /* FMACS is a special case where the dependent
11100                            instruction can be issued 3 cycles before
11101                            the normal latency in case of an output
11102                            dependency.  */
11103                         if ((attr_type_insn == TYPE_FMACS
11104                              || attr_type_insn == TYPE_FMACD)
11105                             && (attr_type_dep == TYPE_FMACS
11106                                 || attr_type_dep == TYPE_FMACD))
11107                           {
11108                             if (dep_type == REG_DEP_OUTPUT)
11109                               *cost = insn_default_latency (dep) - 3;
11110                             else
11111                               *cost = insn_default_latency (dep);
11112                             return false;
11113                           }
11114                         else
11115                           {
11116                             if (dep_type == REG_DEP_OUTPUT)
11117                               *cost = insn_default_latency (dep) + 1;
11118                             else
11119                               *cost = insn_default_latency (dep);
11120                           }
11121                         return false;
11122                       }
11123                   }
11124               }
11125           }
11126         break;
11127
11128     default:
11129       gcc_unreachable ();
11130     }
11131
11132   return true;
11133 }
11134
11135 /* Adjust cost hook for FA726TE.  */
11136 static bool
11137 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11138                            int * cost)
11139 {
11140   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11141      have penalty of 3.  */
11142   if (dep_type == REG_DEP_TRUE
11143       && recog_memoized (insn) >= 0
11144       && recog_memoized (dep) >= 0
11145       && get_attr_conds (dep) == CONDS_SET)
11146     {
11147       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11148       if (get_attr_conds (insn) == CONDS_USE
11149           && get_attr_type (insn) != TYPE_BRANCH)
11150         {
11151           *cost = 3;
11152           return false;
11153         }
11154
11155       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11156           || get_attr_conds (insn) == CONDS_USE)
11157         {
11158           *cost = 0;
11159           return false;
11160         }
11161     }
11162
11163   return true;
11164 }
11165
11166 /* Implement TARGET_REGISTER_MOVE_COST.
11167
11168    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11169    it is typically more expensive than a single memory access.  We set
11170    the cost to less than two memory accesses so that floating
11171    point to integer conversion does not go through memory.  */
11172
11173 int
11174 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11175                         reg_class_t from, reg_class_t to)
11176 {
11177   if (TARGET_32BIT)
11178     {
11179       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11180           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11181         return 15;
11182       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11183                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11184         return 4;
11185       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11186         return 20;
11187       else
11188         return 2;
11189     }
11190   else
11191     {
11192       if (from == HI_REGS || to == HI_REGS)
11193         return 4;
11194       else
11195         return 2;
11196     }
11197 }
11198
11199 /* Implement TARGET_MEMORY_MOVE_COST.  */
11200
11201 int
11202 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11203                       bool in ATTRIBUTE_UNUSED)
11204 {
11205   if (TARGET_32BIT)
11206     return 10;
11207   else
11208     {
11209       if (GET_MODE_SIZE (mode) < 4)
11210         return 8;
11211       else
11212         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11213     }
11214 }
11215
11216 /* Vectorizer cost model implementation.  */
11217
11218 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11219 static int
11220 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11221                                 tree vectype,
11222                                 int misalign ATTRIBUTE_UNUSED)
11223 {
11224   unsigned elements;
11225
11226   switch (type_of_cost)
11227     {
11228       case scalar_stmt:
11229         return current_tune->vec_costs->scalar_stmt_cost;
11230
11231       case scalar_load:
11232         return current_tune->vec_costs->scalar_load_cost;
11233
11234       case scalar_store:
11235         return current_tune->vec_costs->scalar_store_cost;
11236
11237       case vector_stmt:
11238         return current_tune->vec_costs->vec_stmt_cost;
11239
11240       case vector_load:
11241         return current_tune->vec_costs->vec_align_load_cost;
11242
11243       case vector_store:
11244         return current_tune->vec_costs->vec_store_cost;
11245
11246       case vec_to_scalar:
11247         return current_tune->vec_costs->vec_to_scalar_cost;
11248
11249       case scalar_to_vec:
11250         return current_tune->vec_costs->scalar_to_vec_cost;
11251
11252       case unaligned_load:
11253         return current_tune->vec_costs->vec_unalign_load_cost;
11254
11255       case unaligned_store:
11256         return current_tune->vec_costs->vec_unalign_store_cost;
11257
11258       case cond_branch_taken:
11259         return current_tune->vec_costs->cond_taken_branch_cost;
11260
11261       case cond_branch_not_taken:
11262         return current_tune->vec_costs->cond_not_taken_branch_cost;
11263
11264       case vec_perm:
11265       case vec_promote_demote:
11266         return current_tune->vec_costs->vec_stmt_cost;
11267
11268       case vec_construct:
11269         elements = TYPE_VECTOR_SUBPARTS (vectype);
11270         return elements / 2 + 1;
11271
11272       default:
11273         gcc_unreachable ();
11274     }
11275 }
11276
11277 /* Implement targetm.vectorize.add_stmt_cost.  */
11278
11279 static unsigned
11280 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11281                    struct _stmt_vec_info *stmt_info, int misalign,
11282                    enum vect_cost_model_location where)
11283 {
11284   unsigned *cost = (unsigned *) data;
11285   unsigned retval = 0;
11286
11287   if (flag_vect_cost_model)
11288     {
11289       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11290       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11291
11292       /* Statements in an inner loop relative to the loop being
11293          vectorized are weighted more heavily.  The value here is
11294          arbitrary and could potentially be improved with analysis.  */
11295       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11296         count *= 50;  /* FIXME.  */
11297
11298       retval = (unsigned) (count * stmt_cost);
11299       cost[where] += retval;
11300     }
11301
11302   return retval;
11303 }
11304
11305 /* Return true if and only if this insn can dual-issue only as older.  */
11306 static bool
11307 cortexa7_older_only (rtx_insn *insn)
11308 {
11309   if (recog_memoized (insn) < 0)
11310     return false;
11311
11312   switch (get_attr_type (insn))
11313     {
11314     case TYPE_ALU_DSP_REG:
11315     case TYPE_ALU_SREG:
11316     case TYPE_ALUS_SREG:
11317     case TYPE_LOGIC_REG:
11318     case TYPE_LOGICS_REG:
11319     case TYPE_ADC_REG:
11320     case TYPE_ADCS_REG:
11321     case TYPE_ADR:
11322     case TYPE_BFM:
11323     case TYPE_REV:
11324     case TYPE_MVN_REG:
11325     case TYPE_SHIFT_IMM:
11326     case TYPE_SHIFT_REG:
11327     case TYPE_LOAD_BYTE:
11328     case TYPE_LOAD_4:
11329     case TYPE_STORE_4:
11330     case TYPE_FFARITHS:
11331     case TYPE_FADDS:
11332     case TYPE_FFARITHD:
11333     case TYPE_FADDD:
11334     case TYPE_FMOV:
11335     case TYPE_F_CVT:
11336     case TYPE_FCMPS:
11337     case TYPE_FCMPD:
11338     case TYPE_FCONSTS:
11339     case TYPE_FCONSTD:
11340     case TYPE_FMULS:
11341     case TYPE_FMACS:
11342     case TYPE_FMULD:
11343     case TYPE_FMACD:
11344     case TYPE_FDIVS:
11345     case TYPE_FDIVD:
11346     case TYPE_F_MRC:
11347     case TYPE_F_MRRC:
11348     case TYPE_F_FLAG:
11349     case TYPE_F_LOADS:
11350     case TYPE_F_STORES:
11351       return true;
11352     default:
11353       return false;
11354     }
11355 }
11356
11357 /* Return true if and only if this insn can dual-issue as younger.  */
11358 static bool
11359 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11360 {
11361   if (recog_memoized (insn) < 0)
11362     {
11363       if (verbose > 5)
11364         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11365       return false;
11366     }
11367
11368   switch (get_attr_type (insn))
11369     {
11370     case TYPE_ALU_IMM:
11371     case TYPE_ALUS_IMM:
11372     case TYPE_LOGIC_IMM:
11373     case TYPE_LOGICS_IMM:
11374     case TYPE_EXTEND:
11375     case TYPE_MVN_IMM:
11376     case TYPE_MOV_IMM:
11377     case TYPE_MOV_REG:
11378     case TYPE_MOV_SHIFT:
11379     case TYPE_MOV_SHIFT_REG:
11380     case TYPE_BRANCH:
11381     case TYPE_CALL:
11382       return true;
11383     default:
11384       return false;
11385     }
11386 }
11387
11388
11389 /* Look for an instruction that can dual issue only as an older
11390    instruction, and move it in front of any instructions that can
11391    dual-issue as younger, while preserving the relative order of all
11392    other instructions in the ready list.  This is a hueuristic to help
11393    dual-issue in later cycles, by postponing issue of more flexible
11394    instructions.  This heuristic may affect dual issue opportunities
11395    in the current cycle.  */
11396 static void
11397 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11398                         int *n_readyp, int clock)
11399 {
11400   int i;
11401   int first_older_only = -1, first_younger = -1;
11402
11403   if (verbose > 5)
11404     fprintf (file,
11405              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11406              clock,
11407              *n_readyp);
11408
11409   /* Traverse the ready list from the head (the instruction to issue
11410      first), and looking for the first instruction that can issue as
11411      younger and the first instruction that can dual-issue only as
11412      older.  */
11413   for (i = *n_readyp - 1; i >= 0; i--)
11414     {
11415       rtx_insn *insn = ready[i];
11416       if (cortexa7_older_only (insn))
11417         {
11418           first_older_only = i;
11419           if (verbose > 5)
11420             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11421           break;
11422         }
11423       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11424         first_younger = i;
11425     }
11426
11427   /* Nothing to reorder because either no younger insn found or insn
11428      that can dual-issue only as older appears before any insn that
11429      can dual-issue as younger.  */
11430   if (first_younger == -1)
11431     {
11432       if (verbose > 5)
11433         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11434       return;
11435     }
11436
11437   /* Nothing to reorder because no older-only insn in the ready list.  */
11438   if (first_older_only == -1)
11439     {
11440       if (verbose > 5)
11441         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11442       return;
11443     }
11444
11445   /* Move first_older_only insn before first_younger.  */
11446   if (verbose > 5)
11447     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11448              INSN_UID(ready [first_older_only]),
11449              INSN_UID(ready [first_younger]));
11450   rtx_insn *first_older_only_insn = ready [first_older_only];
11451   for (i = first_older_only; i < first_younger; i++)
11452     {
11453       ready[i] = ready[i+1];
11454     }
11455
11456   ready[i] = first_older_only_insn;
11457   return;
11458 }
11459
11460 /* Implement TARGET_SCHED_REORDER. */
11461 static int
11462 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11463                    int clock)
11464 {
11465   switch (arm_tune)
11466     {
11467     case TARGET_CPU_cortexa7:
11468       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11469       break;
11470     default:
11471       /* Do nothing for other cores.  */
11472       break;
11473     }
11474
11475   return arm_issue_rate ();
11476 }
11477
11478 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11479    It corrects the value of COST based on the relationship between
11480    INSN and DEP through the dependence LINK.  It returns the new
11481    value. There is a per-core adjust_cost hook to adjust scheduler costs
11482    and the per-core hook can choose to completely override the generic
11483    adjust_cost function. Only put bits of code into arm_adjust_cost that
11484    are common across all cores.  */
11485 static int
11486 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11487                  unsigned int)
11488 {
11489   rtx i_pat, d_pat;
11490
11491  /* When generating Thumb-1 code, we want to place flag-setting operations
11492     close to a conditional branch which depends on them, so that we can
11493     omit the comparison. */
11494   if (TARGET_THUMB1
11495       && dep_type == 0
11496       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11497       && recog_memoized (dep) >= 0
11498       && get_attr_conds (dep) == CONDS_SET)
11499     return 0;
11500
11501   if (current_tune->sched_adjust_cost != NULL)
11502     {
11503       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11504         return cost;
11505     }
11506
11507   /* XXX Is this strictly true?  */
11508   if (dep_type == REG_DEP_ANTI
11509       || dep_type == REG_DEP_OUTPUT)
11510     return 0;
11511
11512   /* Call insns don't incur a stall, even if they follow a load.  */
11513   if (dep_type == 0
11514       && CALL_P (insn))
11515     return 1;
11516
11517   if ((i_pat = single_set (insn)) != NULL
11518       && MEM_P (SET_SRC (i_pat))
11519       && (d_pat = single_set (dep)) != NULL
11520       && MEM_P (SET_DEST (d_pat)))
11521     {
11522       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11523       /* This is a load after a store, there is no conflict if the load reads
11524          from a cached area.  Assume that loads from the stack, and from the
11525          constant pool are cached, and that others will miss.  This is a
11526          hack.  */
11527
11528       if ((GET_CODE (src_mem) == SYMBOL_REF
11529            && CONSTANT_POOL_ADDRESS_P (src_mem))
11530           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11531           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11532           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11533         return 1;
11534     }
11535
11536   return cost;
11537 }
11538
11539 int
11540 arm_max_conditional_execute (void)
11541 {
11542   return max_insns_skipped;
11543 }
11544
11545 static int
11546 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11547 {
11548   if (TARGET_32BIT)
11549     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11550   else
11551     return (optimize > 0) ? 2 : 0;
11552 }
11553
11554 static int
11555 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11556 {
11557   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11558 }
11559
11560 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11561    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11562    sequences of non-executed instructions in IT blocks probably take the same
11563    amount of time as executed instructions (and the IT instruction itself takes
11564    space in icache).  This function was experimentally determined to give good
11565    results on a popular embedded benchmark.  */
11566
11567 static int
11568 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11569 {
11570   return (TARGET_32BIT && speed_p) ? 1
11571          : arm_default_branch_cost (speed_p, predictable_p);
11572 }
11573
11574 static int
11575 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11576 {
11577   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11578 }
11579
11580 static bool fp_consts_inited = false;
11581
11582 static REAL_VALUE_TYPE value_fp0;
11583
11584 static void
11585 init_fp_table (void)
11586 {
11587   REAL_VALUE_TYPE r;
11588
11589   r = REAL_VALUE_ATOF ("0", DFmode);
11590   value_fp0 = r;
11591   fp_consts_inited = true;
11592 }
11593
11594 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11595 int
11596 arm_const_double_rtx (rtx x)
11597 {
11598   const REAL_VALUE_TYPE *r;
11599
11600   if (!fp_consts_inited)
11601     init_fp_table ();
11602
11603   r = CONST_DOUBLE_REAL_VALUE (x);
11604   if (REAL_VALUE_MINUS_ZERO (*r))
11605     return 0;
11606
11607   if (real_equal (r, &value_fp0))
11608     return 1;
11609
11610   return 0;
11611 }
11612
11613 /* VFPv3 has a fairly wide range of representable immediates, formed from
11614    "quarter-precision" floating-point values. These can be evaluated using this
11615    formula (with ^ for exponentiation):
11616
11617      -1^s * n * 2^-r
11618
11619    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11620    16 <= n <= 31 and 0 <= r <= 7.
11621
11622    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11623
11624      - A (most-significant) is the sign bit.
11625      - BCD are the exponent (encoded as r XOR 3).
11626      - EFGH are the mantissa (encoded as n - 16).
11627 */
11628
11629 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11630    fconst[sd] instruction, or -1 if X isn't suitable.  */
11631 static int
11632 vfp3_const_double_index (rtx x)
11633 {
11634   REAL_VALUE_TYPE r, m;
11635   int sign, exponent;
11636   unsigned HOST_WIDE_INT mantissa, mant_hi;
11637   unsigned HOST_WIDE_INT mask;
11638   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11639   bool fail;
11640
11641   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11642     return -1;
11643
11644   r = *CONST_DOUBLE_REAL_VALUE (x);
11645
11646   /* We can't represent these things, so detect them first.  */
11647   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11648     return -1;
11649
11650   /* Extract sign, exponent and mantissa.  */
11651   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11652   r = real_value_abs (&r);
11653   exponent = REAL_EXP (&r);
11654   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11655      highest (sign) bit, with a fixed binary point at bit point_pos.
11656      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11657      bits for the mantissa, this may fail (low bits would be lost).  */
11658   real_ldexp (&m, &r, point_pos - exponent);
11659   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11660   mantissa = w.elt (0);
11661   mant_hi = w.elt (1);
11662
11663   /* If there are bits set in the low part of the mantissa, we can't
11664      represent this value.  */
11665   if (mantissa != 0)
11666     return -1;
11667
11668   /* Now make it so that mantissa contains the most-significant bits, and move
11669      the point_pos to indicate that the least-significant bits have been
11670      discarded.  */
11671   point_pos -= HOST_BITS_PER_WIDE_INT;
11672   mantissa = mant_hi;
11673
11674   /* We can permit four significant bits of mantissa only, plus a high bit
11675      which is always 1.  */
11676   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11677   if ((mantissa & mask) != 0)
11678     return -1;
11679
11680   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11681   mantissa >>= point_pos - 5;
11682
11683   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11684      floating-point immediate zero with Neon using an integer-zero load, but
11685      that case is handled elsewhere.)  */
11686   if (mantissa == 0)
11687     return -1;
11688
11689   gcc_assert (mantissa >= 16 && mantissa <= 31);
11690
11691   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11692      normalized significands are in the range [1, 2). (Our mantissa is shifted
11693      left 4 places at this point relative to normalized IEEE754 values).  GCC
11694      internally uses [0.5, 1) (see real.c), so the exponent returned from
11695      REAL_EXP must be altered.  */
11696   exponent = 5 - exponent;
11697
11698   if (exponent < 0 || exponent > 7)
11699     return -1;
11700
11701   /* Sign, mantissa and exponent are now in the correct form to plug into the
11702      formula described in the comment above.  */
11703   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11704 }
11705
11706 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11707 int
11708 vfp3_const_double_rtx (rtx x)
11709 {
11710   if (!TARGET_VFP3)
11711     return 0;
11712
11713   return vfp3_const_double_index (x) != -1;
11714 }
11715
11716 /* Recognize immediates which can be used in various Neon instructions. Legal
11717    immediates are described by the following table (for VMVN variants, the
11718    bitwise inverse of the constant shown is recognized. In either case, VMOV
11719    is output and the correct instruction to use for a given constant is chosen
11720    by the assembler). The constant shown is replicated across all elements of
11721    the destination vector.
11722
11723    insn elems variant constant (binary)
11724    ---- ----- ------- -----------------
11725    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11726    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11727    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11728    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11729    vmov  i16     4    00000000 abcdefgh
11730    vmov  i16     5    abcdefgh 00000000
11731    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11732    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11733    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11734    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11735    vmvn  i16    10    00000000 abcdefgh
11736    vmvn  i16    11    abcdefgh 00000000
11737    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11738    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11739    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11740    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11741    vmov   i8    16    abcdefgh
11742    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11743                       eeeeeeee ffffffff gggggggg hhhhhhhh
11744    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11745    vmov  f32    19    00000000 00000000 00000000 00000000
11746
11747    For case 18, B = !b. Representable values are exactly those accepted by
11748    vfp3_const_double_index, but are output as floating-point numbers rather
11749    than indices.
11750
11751    For case 19, we will change it to vmov.i32 when assembling.
11752
11753    Variants 0-5 (inclusive) may also be used as immediates for the second
11754    operand of VORR/VBIC instructions.
11755
11756    The INVERSE argument causes the bitwise inverse of the given operand to be
11757    recognized instead (used for recognizing legal immediates for the VAND/VORN
11758    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11759    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11760    output, rather than the real insns vbic/vorr).
11761
11762    INVERSE makes no difference to the recognition of float vectors.
11763
11764    The return value is the variant of immediate as shown in the above table, or
11765    -1 if the given value doesn't match any of the listed patterns.
11766 */
11767 static int
11768 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11769                       rtx *modconst, int *elementwidth)
11770 {
11771 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11772   matches = 1;                                  \
11773   for (i = 0; i < idx; i += (STRIDE))           \
11774     if (!(TEST))                                \
11775       matches = 0;                              \
11776   if (matches)                                  \
11777     {                                           \
11778       immtype = (CLASS);                        \
11779       elsize = (ELSIZE);                        \
11780       break;                                    \
11781     }
11782
11783   unsigned int i, elsize = 0, idx = 0, n_elts;
11784   unsigned int innersize;
11785   unsigned char bytes[16];
11786   int immtype = -1, matches;
11787   unsigned int invmask = inverse ? 0xff : 0;
11788   bool vector = GET_CODE (op) == CONST_VECTOR;
11789
11790   if (vector)
11791     n_elts = CONST_VECTOR_NUNITS (op);
11792   else
11793     {
11794       n_elts = 1;
11795       if (mode == VOIDmode)
11796         mode = DImode;
11797     }
11798
11799   innersize = GET_MODE_UNIT_SIZE (mode);
11800
11801   /* Vectors of float constants.  */
11802   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11803     {
11804       rtx el0 = CONST_VECTOR_ELT (op, 0);
11805
11806       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11807         return -1;
11808
11809       /* FP16 vectors cannot be represented.  */
11810       if (GET_MODE_INNER (mode) == HFmode)
11811         return -1;
11812
11813       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11814          are distinct in this context.  */
11815       if (!const_vec_duplicate_p (op))
11816         return -1;
11817
11818       if (modconst)
11819         *modconst = CONST_VECTOR_ELT (op, 0);
11820
11821       if (elementwidth)
11822         *elementwidth = 0;
11823
11824       if (el0 == CONST0_RTX (GET_MODE (el0)))
11825         return 19;
11826       else
11827         return 18;
11828     }
11829
11830   /* The tricks done in the code below apply for little-endian vector layout.
11831      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11832      FIXME: Implement logic for big-endian vectors.  */
11833   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11834     return -1;
11835
11836   /* Splat vector constant out into a byte vector.  */
11837   for (i = 0; i < n_elts; i++)
11838     {
11839       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11840       unsigned HOST_WIDE_INT elpart;
11841
11842       gcc_assert (CONST_INT_P (el));
11843       elpart = INTVAL (el);
11844
11845       for (unsigned int byte = 0; byte < innersize; byte++)
11846         {
11847           bytes[idx++] = (elpart & 0xff) ^ invmask;
11848           elpart >>= BITS_PER_UNIT;
11849         }
11850     }
11851
11852   /* Sanity check.  */
11853   gcc_assert (idx == GET_MODE_SIZE (mode));
11854
11855   do
11856     {
11857       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11858                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11859
11860       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11861                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11862
11863       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11864                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11865
11866       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11867                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11868
11869       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11870
11871       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11872
11873       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11874                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11875
11876       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11877                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11878
11879       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11880                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11881
11882       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11883                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11884
11885       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11886
11887       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11888
11889       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11890                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11891
11892       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11893                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11894
11895       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11896                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11897
11898       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11899                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11900
11901       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11902
11903       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11904                         && bytes[i] == bytes[(i + 8) % idx]);
11905     }
11906   while (0);
11907
11908   if (immtype == -1)
11909     return -1;
11910
11911   if (elementwidth)
11912     *elementwidth = elsize;
11913
11914   if (modconst)
11915     {
11916       unsigned HOST_WIDE_INT imm = 0;
11917
11918       /* Un-invert bytes of recognized vector, if necessary.  */
11919       if (invmask != 0)
11920         for (i = 0; i < idx; i++)
11921           bytes[i] ^= invmask;
11922
11923       if (immtype == 17)
11924         {
11925           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11926           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11927
11928           for (i = 0; i < 8; i++)
11929             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11930                    << (i * BITS_PER_UNIT);
11931
11932           *modconst = GEN_INT (imm);
11933         }
11934       else
11935         {
11936           unsigned HOST_WIDE_INT imm = 0;
11937
11938           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11939             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11940
11941           *modconst = GEN_INT (imm);
11942         }
11943     }
11944
11945   return immtype;
11946 #undef CHECK
11947 }
11948
11949 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11950    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11951    float elements), and a modified constant (whatever should be output for a
11952    VMOV) in *MODCONST.  */
11953
11954 int
11955 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11956                                rtx *modconst, int *elementwidth)
11957 {
11958   rtx tmpconst;
11959   int tmpwidth;
11960   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11961
11962   if (retval == -1)
11963     return 0;
11964
11965   if (modconst)
11966     *modconst = tmpconst;
11967
11968   if (elementwidth)
11969     *elementwidth = tmpwidth;
11970
11971   return 1;
11972 }
11973
11974 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11975    the immediate is valid, write a constant suitable for using as an operand
11976    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11977    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11978
11979 int
11980 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11981                                 rtx *modconst, int *elementwidth)
11982 {
11983   rtx tmpconst;
11984   int tmpwidth;
11985   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11986
11987   if (retval < 0 || retval > 5)
11988     return 0;
11989
11990   if (modconst)
11991     *modconst = tmpconst;
11992
11993   if (elementwidth)
11994     *elementwidth = tmpwidth;
11995
11996   return 1;
11997 }
11998
11999 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12000    the immediate is valid, write a constant suitable for using as an operand
12001    to VSHR/VSHL to *MODCONST and the corresponding element width to
12002    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12003    because they have different limitations.  */
12004
12005 int
12006 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12007                                 rtx *modconst, int *elementwidth,
12008                                 bool isleftshift)
12009 {
12010   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12011   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12012   unsigned HOST_WIDE_INT last_elt = 0;
12013   unsigned HOST_WIDE_INT maxshift;
12014
12015   /* Split vector constant out into a byte vector.  */
12016   for (i = 0; i < n_elts; i++)
12017     {
12018       rtx el = CONST_VECTOR_ELT (op, i);
12019       unsigned HOST_WIDE_INT elpart;
12020
12021       if (CONST_INT_P (el))
12022         elpart = INTVAL (el);
12023       else if (CONST_DOUBLE_P (el))
12024         return 0;
12025       else
12026         gcc_unreachable ();
12027
12028       if (i != 0 && elpart != last_elt)
12029         return 0;
12030
12031       last_elt = elpart;
12032     }
12033
12034   /* Shift less than element size.  */
12035   maxshift = innersize * 8;
12036
12037   if (isleftshift)
12038     {
12039       /* Left shift immediate value can be from 0 to <size>-1.  */
12040       if (last_elt >= maxshift)
12041         return 0;
12042     }
12043   else
12044     {
12045       /* Right shift immediate value can be from 1 to <size>.  */
12046       if (last_elt == 0 || last_elt > maxshift)
12047         return 0;
12048     }
12049
12050   if (elementwidth)
12051     *elementwidth = innersize * 8;
12052
12053   if (modconst)
12054     *modconst = CONST_VECTOR_ELT (op, 0);
12055
12056   return 1;
12057 }
12058
12059 /* Return a string suitable for output of Neon immediate logic operation
12060    MNEM.  */
12061
12062 char *
12063 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12064                              int inverse, int quad)
12065 {
12066   int width, is_valid;
12067   static char templ[40];
12068
12069   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12070
12071   gcc_assert (is_valid != 0);
12072
12073   if (quad)
12074     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12075   else
12076     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12077
12078   return templ;
12079 }
12080
12081 /* Return a string suitable for output of Neon immediate shift operation
12082    (VSHR or VSHL) MNEM.  */
12083
12084 char *
12085 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12086                              machine_mode mode, int quad,
12087                              bool isleftshift)
12088 {
12089   int width, is_valid;
12090   static char templ[40];
12091
12092   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12093   gcc_assert (is_valid != 0);
12094
12095   if (quad)
12096     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12097   else
12098     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12099
12100   return templ;
12101 }
12102
12103 /* Output a sequence of pairwise operations to implement a reduction.
12104    NOTE: We do "too much work" here, because pairwise operations work on two
12105    registers-worth of operands in one go. Unfortunately we can't exploit those
12106    extra calculations to do the full operation in fewer steps, I don't think.
12107    Although all vector elements of the result but the first are ignored, we
12108    actually calculate the same result in each of the elements. An alternative
12109    such as initially loading a vector with zero to use as each of the second
12110    operands would use up an additional register and take an extra instruction,
12111    for no particular gain.  */
12112
12113 void
12114 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12115                       rtx (*reduc) (rtx, rtx, rtx))
12116 {
12117   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12118   rtx tmpsum = op1;
12119
12120   for (i = parts / 2; i >= 1; i /= 2)
12121     {
12122       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12123       emit_insn (reduc (dest, tmpsum, tmpsum));
12124       tmpsum = dest;
12125     }
12126 }
12127
12128 /* If VALS is a vector constant that can be loaded into a register
12129    using VDUP, generate instructions to do so and return an RTX to
12130    assign to the register.  Otherwise return NULL_RTX.  */
12131
12132 static rtx
12133 neon_vdup_constant (rtx vals)
12134 {
12135   machine_mode mode = GET_MODE (vals);
12136   machine_mode inner_mode = GET_MODE_INNER (mode);
12137   rtx x;
12138
12139   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12140     return NULL_RTX;
12141
12142   if (!const_vec_duplicate_p (vals, &x))
12143     /* The elements are not all the same.  We could handle repeating
12144        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12145        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12146        vdup.i16).  */
12147     return NULL_RTX;
12148
12149   /* We can load this constant by using VDUP and a constant in a
12150      single ARM register.  This will be cheaper than a vector
12151      load.  */
12152
12153   x = copy_to_mode_reg (inner_mode, x);
12154   return gen_rtx_VEC_DUPLICATE (mode, x);
12155 }
12156
12157 /* Generate code to load VALS, which is a PARALLEL containing only
12158    constants (for vec_init) or CONST_VECTOR, efficiently into a
12159    register.  Returns an RTX to copy into the register, or NULL_RTX
12160    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12161
12162 rtx
12163 neon_make_constant (rtx vals)
12164 {
12165   machine_mode mode = GET_MODE (vals);
12166   rtx target;
12167   rtx const_vec = NULL_RTX;
12168   int n_elts = GET_MODE_NUNITS (mode);
12169   int n_const = 0;
12170   int i;
12171
12172   if (GET_CODE (vals) == CONST_VECTOR)
12173     const_vec = vals;
12174   else if (GET_CODE (vals) == PARALLEL)
12175     {
12176       /* A CONST_VECTOR must contain only CONST_INTs and
12177          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12178          Only store valid constants in a CONST_VECTOR.  */
12179       for (i = 0; i < n_elts; ++i)
12180         {
12181           rtx x = XVECEXP (vals, 0, i);
12182           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12183             n_const++;
12184         }
12185       if (n_const == n_elts)
12186         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12187     }
12188   else
12189     gcc_unreachable ();
12190
12191   if (const_vec != NULL
12192       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12193     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12194     return const_vec;
12195   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12196     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12197        pipeline cycle; creating the constant takes one or two ARM
12198        pipeline cycles.  */
12199     return target;
12200   else if (const_vec != NULL_RTX)
12201     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12202        (for either double or quad vectors).  We can not take advantage
12203        of single-cycle VLD1 because we need a PC-relative addressing
12204        mode.  */
12205     return const_vec;
12206   else
12207     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12208        We can not construct an initializer.  */
12209     return NULL_RTX;
12210 }
12211
12212 /* Initialize vector TARGET to VALS.  */
12213
12214 void
12215 neon_expand_vector_init (rtx target, rtx vals)
12216 {
12217   machine_mode mode = GET_MODE (target);
12218   machine_mode inner_mode = GET_MODE_INNER (mode);
12219   int n_elts = GET_MODE_NUNITS (mode);
12220   int n_var = 0, one_var = -1;
12221   bool all_same = true;
12222   rtx x, mem;
12223   int i;
12224
12225   for (i = 0; i < n_elts; ++i)
12226     {
12227       x = XVECEXP (vals, 0, i);
12228       if (!CONSTANT_P (x))
12229         ++n_var, one_var = i;
12230
12231       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12232         all_same = false;
12233     }
12234
12235   if (n_var == 0)
12236     {
12237       rtx constant = neon_make_constant (vals);
12238       if (constant != NULL_RTX)
12239         {
12240           emit_move_insn (target, constant);
12241           return;
12242         }
12243     }
12244
12245   /* Splat a single non-constant element if we can.  */
12246   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12247     {
12248       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12249       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12250       return;
12251     }
12252
12253   /* One field is non-constant.  Load constant then overwrite varying
12254      field.  This is more efficient than using the stack.  */
12255   if (n_var == 1)
12256     {
12257       rtx copy = copy_rtx (vals);
12258       rtx index = GEN_INT (one_var);
12259
12260       /* Load constant part of vector, substitute neighboring value for
12261          varying element.  */
12262       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12263       neon_expand_vector_init (target, copy);
12264
12265       /* Insert variable.  */
12266       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12267       switch (mode)
12268         {
12269         case E_V8QImode:
12270           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12271           break;
12272         case E_V16QImode:
12273           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12274           break;
12275         case E_V4HImode:
12276           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12277           break;
12278         case E_V8HImode:
12279           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12280           break;
12281         case E_V2SImode:
12282           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12283           break;
12284         case E_V4SImode:
12285           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12286           break;
12287         case E_V2SFmode:
12288           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12289           break;
12290         case E_V4SFmode:
12291           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12292           break;
12293         case E_V2DImode:
12294           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12295           break;
12296         default:
12297           gcc_unreachable ();
12298         }
12299       return;
12300     }
12301
12302   /* Construct the vector in memory one field at a time
12303      and load the whole vector.  */
12304   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12305   for (i = 0; i < n_elts; i++)
12306     emit_move_insn (adjust_address_nv (mem, inner_mode,
12307                                     i * GET_MODE_SIZE (inner_mode)),
12308                     XVECEXP (vals, 0, i));
12309   emit_move_insn (target, mem);
12310 }
12311
12312 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12313    ERR if it doesn't.  EXP indicates the source location, which includes the
12314    inlining history for intrinsics.  */
12315
12316 static void
12317 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12318               const_tree exp, const char *desc)
12319 {
12320   HOST_WIDE_INT lane;
12321
12322   gcc_assert (CONST_INT_P (operand));
12323
12324   lane = INTVAL (operand);
12325
12326   if (lane < low || lane >= high)
12327     {
12328       if (exp)
12329         error ("%K%s %wd out of range %wd - %wd",
12330                exp, desc, lane, low, high - 1);
12331       else
12332         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12333     }
12334 }
12335
12336 /* Bounds-check lanes.  */
12337
12338 void
12339 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12340                   const_tree exp)
12341 {
12342   bounds_check (operand, low, high, exp, "lane");
12343 }
12344
12345 /* Bounds-check constants.  */
12346
12347 void
12348 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12349 {
12350   bounds_check (operand, low, high, NULL_TREE, "constant");
12351 }
12352
12353 HOST_WIDE_INT
12354 neon_element_bits (machine_mode mode)
12355 {
12356   return GET_MODE_UNIT_BITSIZE (mode);
12357 }
12358
12359 \f
12360 /* Predicates for `match_operand' and `match_operator'.  */
12361
12362 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12363    WB is true if full writeback address modes are allowed and is false
12364    if limited writeback address modes (POST_INC and PRE_DEC) are
12365    allowed.  */
12366
12367 int
12368 arm_coproc_mem_operand (rtx op, bool wb)
12369 {
12370   rtx ind;
12371
12372   /* Reject eliminable registers.  */
12373   if (! (reload_in_progress || reload_completed || lra_in_progress)
12374       && (   reg_mentioned_p (frame_pointer_rtx, op)
12375           || reg_mentioned_p (arg_pointer_rtx, op)
12376           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12377           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12378           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12379           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12380     return FALSE;
12381
12382   /* Constants are converted into offsets from labels.  */
12383   if (!MEM_P (op))
12384     return FALSE;
12385
12386   ind = XEXP (op, 0);
12387
12388   if (reload_completed
12389       && (GET_CODE (ind) == LABEL_REF
12390           || (GET_CODE (ind) == CONST
12391               && GET_CODE (XEXP (ind, 0)) == PLUS
12392               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12393               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12394     return TRUE;
12395
12396   /* Match: (mem (reg)).  */
12397   if (REG_P (ind))
12398     return arm_address_register_rtx_p (ind, 0);
12399
12400   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12401      acceptable in any case (subject to verification by
12402      arm_address_register_rtx_p).  We need WB to be true to accept
12403      PRE_INC and POST_DEC.  */
12404   if (GET_CODE (ind) == POST_INC
12405       || GET_CODE (ind) == PRE_DEC
12406       || (wb
12407           && (GET_CODE (ind) == PRE_INC
12408               || GET_CODE (ind) == POST_DEC)))
12409     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12410
12411   if (wb
12412       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12413       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12414       && GET_CODE (XEXP (ind, 1)) == PLUS
12415       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12416     ind = XEXP (ind, 1);
12417
12418   /* Match:
12419      (plus (reg)
12420            (const)).  */
12421   if (GET_CODE (ind) == PLUS
12422       && REG_P (XEXP (ind, 0))
12423       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12424       && CONST_INT_P (XEXP (ind, 1))
12425       && INTVAL (XEXP (ind, 1)) > -1024
12426       && INTVAL (XEXP (ind, 1)) <  1024
12427       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12428     return TRUE;
12429
12430   return FALSE;
12431 }
12432
12433 /* Return TRUE if OP is a memory operand which we can load or store a vector
12434    to/from. TYPE is one of the following values:
12435     0 - Vector load/stor (vldr)
12436     1 - Core registers (ldm)
12437     2 - Element/structure loads (vld1)
12438  */
12439 int
12440 neon_vector_mem_operand (rtx op, int type, bool strict)
12441 {
12442   rtx ind;
12443
12444   /* Reject eliminable registers.  */
12445   if (strict && ! (reload_in_progress || reload_completed)
12446       && (reg_mentioned_p (frame_pointer_rtx, op)
12447           || reg_mentioned_p (arg_pointer_rtx, op)
12448           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12449           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12450           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12451           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12452     return FALSE;
12453
12454   /* Constants are converted into offsets from labels.  */
12455   if (!MEM_P (op))
12456     return FALSE;
12457
12458   ind = XEXP (op, 0);
12459
12460   if (reload_completed
12461       && (GET_CODE (ind) == LABEL_REF
12462           || (GET_CODE (ind) == CONST
12463               && GET_CODE (XEXP (ind, 0)) == PLUS
12464               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12465               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12466     return TRUE;
12467
12468   /* Match: (mem (reg)).  */
12469   if (REG_P (ind))
12470     return arm_address_register_rtx_p (ind, 0);
12471
12472   /* Allow post-increment with Neon registers.  */
12473   if ((type != 1 && GET_CODE (ind) == POST_INC)
12474       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12475     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12476
12477   /* Allow post-increment by register for VLDn */
12478   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12479       && GET_CODE (XEXP (ind, 1)) == PLUS
12480       && REG_P (XEXP (XEXP (ind, 1), 1)))
12481      return true;
12482
12483   /* Match:
12484      (plus (reg)
12485           (const)).  */
12486   if (type == 0
12487       && GET_CODE (ind) == PLUS
12488       && REG_P (XEXP (ind, 0))
12489       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12490       && CONST_INT_P (XEXP (ind, 1))
12491       && INTVAL (XEXP (ind, 1)) > -1024
12492       /* For quad modes, we restrict the constant offset to be slightly less
12493          than what the instruction format permits.  We have no such constraint
12494          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12495       && (INTVAL (XEXP (ind, 1))
12496           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12497       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12498     return TRUE;
12499
12500   return FALSE;
12501 }
12502
12503 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12504    type.  */
12505 int
12506 neon_struct_mem_operand (rtx op)
12507 {
12508   rtx ind;
12509
12510   /* Reject eliminable registers.  */
12511   if (! (reload_in_progress || reload_completed)
12512       && (   reg_mentioned_p (frame_pointer_rtx, op)
12513           || reg_mentioned_p (arg_pointer_rtx, op)
12514           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12515           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12516           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12517           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12518     return FALSE;
12519
12520   /* Constants are converted into offsets from labels.  */
12521   if (!MEM_P (op))
12522     return FALSE;
12523
12524   ind = XEXP (op, 0);
12525
12526   if (reload_completed
12527       && (GET_CODE (ind) == LABEL_REF
12528           || (GET_CODE (ind) == CONST
12529               && GET_CODE (XEXP (ind, 0)) == PLUS
12530               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12531               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12532     return TRUE;
12533
12534   /* Match: (mem (reg)).  */
12535   if (REG_P (ind))
12536     return arm_address_register_rtx_p (ind, 0);
12537
12538   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12539   if (GET_CODE (ind) == POST_INC
12540       || GET_CODE (ind) == PRE_DEC)
12541     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12542
12543   return FALSE;
12544 }
12545
12546 /* Return true if X is a register that will be eliminated later on.  */
12547 int
12548 arm_eliminable_register (rtx x)
12549 {
12550   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12551                        || REGNO (x) == ARG_POINTER_REGNUM
12552                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12553                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12554 }
12555
12556 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12557    coprocessor registers.  Otherwise return NO_REGS.  */
12558
12559 enum reg_class
12560 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12561 {
12562   if (mode == HFmode)
12563     {
12564       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12565         return GENERAL_REGS;
12566       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12567         return NO_REGS;
12568       return GENERAL_REGS;
12569     }
12570
12571   /* The neon move patterns handle all legitimate vector and struct
12572      addresses.  */
12573   if (TARGET_NEON
12574       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12575       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12576           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12577           || VALID_NEON_STRUCT_MODE (mode)))
12578     return NO_REGS;
12579
12580   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12581     return NO_REGS;
12582
12583   return GENERAL_REGS;
12584 }
12585
12586 /* Values which must be returned in the most-significant end of the return
12587    register.  */
12588
12589 static bool
12590 arm_return_in_msb (const_tree valtype)
12591 {
12592   return (TARGET_AAPCS_BASED
12593           && BYTES_BIG_ENDIAN
12594           && (AGGREGATE_TYPE_P (valtype)
12595               || TREE_CODE (valtype) == COMPLEX_TYPE
12596               || FIXED_POINT_TYPE_P (valtype)));
12597 }
12598
12599 /* Return TRUE if X references a SYMBOL_REF.  */
12600 int
12601 symbol_mentioned_p (rtx x)
12602 {
12603   const char * fmt;
12604   int i;
12605
12606   if (GET_CODE (x) == SYMBOL_REF)
12607     return 1;
12608
12609   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12610      are constant offsets, not symbols.  */
12611   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12612     return 0;
12613
12614   fmt = GET_RTX_FORMAT (GET_CODE (x));
12615
12616   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12617     {
12618       if (fmt[i] == 'E')
12619         {
12620           int j;
12621
12622           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12623             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12624               return 1;
12625         }
12626       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12627         return 1;
12628     }
12629
12630   return 0;
12631 }
12632
12633 /* Return TRUE if X references a LABEL_REF.  */
12634 int
12635 label_mentioned_p (rtx x)
12636 {
12637   const char * fmt;
12638   int i;
12639
12640   if (GET_CODE (x) == LABEL_REF)
12641     return 1;
12642
12643   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12644      instruction, but they are constant offsets, not symbols.  */
12645   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12646     return 0;
12647
12648   fmt = GET_RTX_FORMAT (GET_CODE (x));
12649   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12650     {
12651       if (fmt[i] == 'E')
12652         {
12653           int j;
12654
12655           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12656             if (label_mentioned_p (XVECEXP (x, i, j)))
12657               return 1;
12658         }
12659       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12660         return 1;
12661     }
12662
12663   return 0;
12664 }
12665
12666 int
12667 tls_mentioned_p (rtx x)
12668 {
12669   switch (GET_CODE (x))
12670     {
12671     case CONST:
12672       return tls_mentioned_p (XEXP (x, 0));
12673
12674     case UNSPEC:
12675       if (XINT (x, 1) == UNSPEC_TLS)
12676         return 1;
12677
12678     /* Fall through.  */
12679     default:
12680       return 0;
12681     }
12682 }
12683
12684 /* Must not copy any rtx that uses a pc-relative address.
12685    Also, disallow copying of load-exclusive instructions that
12686    may appear after splitting of compare-and-swap-style operations
12687    so as to prevent those loops from being transformed away from their
12688    canonical forms (see PR 69904).  */
12689
12690 static bool
12691 arm_cannot_copy_insn_p (rtx_insn *insn)
12692 {
12693   /* The tls call insn cannot be copied, as it is paired with a data
12694      word.  */
12695   if (recog_memoized (insn) == CODE_FOR_tlscall)
12696     return true;
12697
12698   subrtx_iterator::array_type array;
12699   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12700     {
12701       const_rtx x = *iter;
12702       if (GET_CODE (x) == UNSPEC
12703           && (XINT (x, 1) == UNSPEC_PIC_BASE
12704               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12705         return true;
12706     }
12707
12708   rtx set = single_set (insn);
12709   if (set)
12710     {
12711       rtx src = SET_SRC (set);
12712       if (GET_CODE (src) == ZERO_EXTEND)
12713         src = XEXP (src, 0);
12714
12715       /* Catch the load-exclusive and load-acquire operations.  */
12716       if (GET_CODE (src) == UNSPEC_VOLATILE
12717           && (XINT (src, 1) == VUNSPEC_LL
12718               || XINT (src, 1) == VUNSPEC_LAX))
12719         return true;
12720     }
12721   return false;
12722 }
12723
12724 enum rtx_code
12725 minmax_code (rtx x)
12726 {
12727   enum rtx_code code = GET_CODE (x);
12728
12729   switch (code)
12730     {
12731     case SMAX:
12732       return GE;
12733     case SMIN:
12734       return LE;
12735     case UMIN:
12736       return LEU;
12737     case UMAX:
12738       return GEU;
12739     default:
12740       gcc_unreachable ();
12741     }
12742 }
12743
12744 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12745
12746 bool
12747 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12748                         int *mask, bool *signed_sat)
12749 {
12750   /* The high bound must be a power of two minus one.  */
12751   int log = exact_log2 (INTVAL (hi_bound) + 1);
12752   if (log == -1)
12753     return false;
12754
12755   /* The low bound is either zero (for usat) or one less than the
12756      negation of the high bound (for ssat).  */
12757   if (INTVAL (lo_bound) == 0)
12758     {
12759       if (mask)
12760         *mask = log;
12761       if (signed_sat)
12762         *signed_sat = false;
12763
12764       return true;
12765     }
12766
12767   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12768     {
12769       if (mask)
12770         *mask = log + 1;
12771       if (signed_sat)
12772         *signed_sat = true;
12773
12774       return true;
12775     }
12776
12777   return false;
12778 }
12779
12780 /* Return 1 if memory locations are adjacent.  */
12781 int
12782 adjacent_mem_locations (rtx a, rtx b)
12783 {
12784   /* We don't guarantee to preserve the order of these memory refs.  */
12785   if (volatile_refs_p (a) || volatile_refs_p (b))
12786     return 0;
12787
12788   if ((REG_P (XEXP (a, 0))
12789        || (GET_CODE (XEXP (a, 0)) == PLUS
12790            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12791       && (REG_P (XEXP (b, 0))
12792           || (GET_CODE (XEXP (b, 0)) == PLUS
12793               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12794     {
12795       HOST_WIDE_INT val0 = 0, val1 = 0;
12796       rtx reg0, reg1;
12797       int val_diff;
12798
12799       if (GET_CODE (XEXP (a, 0)) == PLUS)
12800         {
12801           reg0 = XEXP (XEXP (a, 0), 0);
12802           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12803         }
12804       else
12805         reg0 = XEXP (a, 0);
12806
12807       if (GET_CODE (XEXP (b, 0)) == PLUS)
12808         {
12809           reg1 = XEXP (XEXP (b, 0), 0);
12810           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12811         }
12812       else
12813         reg1 = XEXP (b, 0);
12814
12815       /* Don't accept any offset that will require multiple
12816          instructions to handle, since this would cause the
12817          arith_adjacentmem pattern to output an overlong sequence.  */
12818       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12819         return 0;
12820
12821       /* Don't allow an eliminable register: register elimination can make
12822          the offset too large.  */
12823       if (arm_eliminable_register (reg0))
12824         return 0;
12825
12826       val_diff = val1 - val0;
12827
12828       if (arm_ld_sched)
12829         {
12830           /* If the target has load delay slots, then there's no benefit
12831              to using an ldm instruction unless the offset is zero and
12832              we are optimizing for size.  */
12833           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12834                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12835                   && (val_diff == 4 || val_diff == -4));
12836         }
12837
12838       return ((REGNO (reg0) == REGNO (reg1))
12839               && (val_diff == 4 || val_diff == -4));
12840     }
12841
12842   return 0;
12843 }
12844
12845 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12846    for load operations, false for store operations.  CONSECUTIVE is true
12847    if the register numbers in the operation must be consecutive in the register
12848    bank. RETURN_PC is true if value is to be loaded in PC.
12849    The pattern we are trying to match for load is:
12850      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12851       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12852        :
12853        :
12854       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12855      ]
12856      where
12857      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12858      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12859      3.  If consecutive is TRUE, then for kth register being loaded,
12860          REGNO (R_dk) = REGNO (R_d0) + k.
12861    The pattern for store is similar.  */
12862 bool
12863 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12864                      bool consecutive, bool return_pc)
12865 {
12866   HOST_WIDE_INT count = XVECLEN (op, 0);
12867   rtx reg, mem, addr;
12868   unsigned regno;
12869   unsigned first_regno;
12870   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12871   rtx elt;
12872   bool addr_reg_in_reglist = false;
12873   bool update = false;
12874   int reg_increment;
12875   int offset_adj;
12876   int regs_per_val;
12877
12878   /* If not in SImode, then registers must be consecutive
12879      (e.g., VLDM instructions for DFmode).  */
12880   gcc_assert ((mode == SImode) || consecutive);
12881   /* Setting return_pc for stores is illegal.  */
12882   gcc_assert (!return_pc || load);
12883
12884   /* Set up the increments and the regs per val based on the mode.  */
12885   reg_increment = GET_MODE_SIZE (mode);
12886   regs_per_val = reg_increment / 4;
12887   offset_adj = return_pc ? 1 : 0;
12888
12889   if (count <= 1
12890       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12891       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12892     return false;
12893
12894   /* Check if this is a write-back.  */
12895   elt = XVECEXP (op, 0, offset_adj);
12896   if (GET_CODE (SET_SRC (elt)) == PLUS)
12897     {
12898       i++;
12899       base = 1;
12900       update = true;
12901
12902       /* The offset adjustment must be the number of registers being
12903          popped times the size of a single register.  */
12904       if (!REG_P (SET_DEST (elt))
12905           || !REG_P (XEXP (SET_SRC (elt), 0))
12906           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12907           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12908           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12909              ((count - 1 - offset_adj) * reg_increment))
12910         return false;
12911     }
12912
12913   i = i + offset_adj;
12914   base = base + offset_adj;
12915   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12916      success depends on the type: VLDM can do just one reg,
12917      LDM must do at least two.  */
12918   if ((count <= i) && (mode == SImode))
12919       return false;
12920
12921   elt = XVECEXP (op, 0, i - 1);
12922   if (GET_CODE (elt) != SET)
12923     return false;
12924
12925   if (load)
12926     {
12927       reg = SET_DEST (elt);
12928       mem = SET_SRC (elt);
12929     }
12930   else
12931     {
12932       reg = SET_SRC (elt);
12933       mem = SET_DEST (elt);
12934     }
12935
12936   if (!REG_P (reg) || !MEM_P (mem))
12937     return false;
12938
12939   regno = REGNO (reg);
12940   first_regno = regno;
12941   addr = XEXP (mem, 0);
12942   if (GET_CODE (addr) == PLUS)
12943     {
12944       if (!CONST_INT_P (XEXP (addr, 1)))
12945         return false;
12946
12947       offset = INTVAL (XEXP (addr, 1));
12948       addr = XEXP (addr, 0);
12949     }
12950
12951   if (!REG_P (addr))
12952     return false;
12953
12954   /* Don't allow SP to be loaded unless it is also the base register. It
12955      guarantees that SP is reset correctly when an LDM instruction
12956      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12957   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12958     return false;
12959
12960   for (; i < count; i++)
12961     {
12962       elt = XVECEXP (op, 0, i);
12963       if (GET_CODE (elt) != SET)
12964         return false;
12965
12966       if (load)
12967         {
12968           reg = SET_DEST (elt);
12969           mem = SET_SRC (elt);
12970         }
12971       else
12972         {
12973           reg = SET_SRC (elt);
12974           mem = SET_DEST (elt);
12975         }
12976
12977       if (!REG_P (reg)
12978           || GET_MODE (reg) != mode
12979           || REGNO (reg) <= regno
12980           || (consecutive
12981               && (REGNO (reg) !=
12982                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12983           /* Don't allow SP to be loaded unless it is also the base register. It
12984              guarantees that SP is reset correctly when an LDM instruction
12985              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12986           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12987           || !MEM_P (mem)
12988           || GET_MODE (mem) != mode
12989           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12990                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12991                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12992                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12993                    offset + (i - base) * reg_increment))
12994               && (!REG_P (XEXP (mem, 0))
12995                   || offset + (i - base) * reg_increment != 0)))
12996         return false;
12997
12998       regno = REGNO (reg);
12999       if (regno == REGNO (addr))
13000         addr_reg_in_reglist = true;
13001     }
13002
13003   if (load)
13004     {
13005       if (update && addr_reg_in_reglist)
13006         return false;
13007
13008       /* For Thumb-1, address register is always modified - either by write-back
13009          or by explicit load.  If the pattern does not describe an update,
13010          then the address register must be in the list of loaded registers.  */
13011       if (TARGET_THUMB1)
13012         return update || addr_reg_in_reglist;
13013     }
13014
13015   return true;
13016 }
13017
13018 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13019    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13020    instruction.  ADD_OFFSET is nonzero if the base address register needs
13021    to be modified with an add instruction before we can use it.  */
13022
13023 static bool
13024 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13025                                  int nops, HOST_WIDE_INT add_offset)
13026  {
13027   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13028      if the offset isn't small enough.  The reason 2 ldrs are faster
13029      is because these ARMs are able to do more than one cache access
13030      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13031      whilst the ARM8 has a double bandwidth cache.  This means that
13032      these cores can do both an instruction fetch and a data fetch in
13033      a single cycle, so the trick of calculating the address into a
13034      scratch register (one of the result regs) and then doing a load
13035      multiple actually becomes slower (and no smaller in code size).
13036      That is the transformation
13037
13038         ldr     rd1, [rbase + offset]
13039         ldr     rd2, [rbase + offset + 4]
13040
13041      to
13042
13043         add     rd1, rbase, offset
13044         ldmia   rd1, {rd1, rd2}
13045
13046      produces worse code -- '3 cycles + any stalls on rd2' instead of
13047      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13048      access per cycle, the first sequence could never complete in less
13049      than 6 cycles, whereas the ldm sequence would only take 5 and
13050      would make better use of sequential accesses if not hitting the
13051      cache.
13052
13053      We cheat here and test 'arm_ld_sched' which we currently know to
13054      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13055      changes, then the test below needs to be reworked.  */
13056   if (nops == 2 && arm_ld_sched && add_offset != 0)
13057     return false;
13058
13059   /* XScale has load-store double instructions, but they have stricter
13060      alignment requirements than load-store multiple, so we cannot
13061      use them.
13062
13063      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13064      the pipeline until completion.
13065
13066         NREGS           CYCLES
13067           1               3
13068           2               4
13069           3               5
13070           4               6
13071
13072      An ldr instruction takes 1-3 cycles, but does not block the
13073      pipeline.
13074
13075         NREGS           CYCLES
13076           1              1-3
13077           2              2-6
13078           3              3-9
13079           4              4-12
13080
13081      Best case ldr will always win.  However, the more ldr instructions
13082      we issue, the less likely we are to be able to schedule them well.
13083      Using ldr instructions also increases code size.
13084
13085      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13086      for counts of 3 or 4 regs.  */
13087   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13088     return false;
13089   return true;
13090 }
13091
13092 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13093    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13094    an array ORDER which describes the sequence to use when accessing the
13095    offsets that produces an ascending order.  In this sequence, each
13096    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13097    must have been filled in with the lowest offset by the caller.
13098    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13099    we use to verify that ORDER produces an ascending order of registers.
13100    Return true if it was possible to construct such an order, false if
13101    not.  */
13102
13103 static bool
13104 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13105                       int *unsorted_regs)
13106 {
13107   int i;
13108   for (i = 1; i < nops; i++)
13109     {
13110       int j;
13111
13112       order[i] = order[i - 1];
13113       for (j = 0; j < nops; j++)
13114         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13115           {
13116             /* We must find exactly one offset that is higher than the
13117                previous one by 4.  */
13118             if (order[i] != order[i - 1])
13119               return false;
13120             order[i] = j;
13121           }
13122       if (order[i] == order[i - 1])
13123         return false;
13124       /* The register numbers must be ascending.  */
13125       if (unsorted_regs != NULL
13126           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13127         return false;
13128     }
13129   return true;
13130 }
13131
13132 /* Used to determine in a peephole whether a sequence of load
13133    instructions can be changed into a load-multiple instruction.
13134    NOPS is the number of separate load instructions we are examining.  The
13135    first NOPS entries in OPERANDS are the destination registers, the
13136    next NOPS entries are memory operands.  If this function is
13137    successful, *BASE is set to the common base register of the memory
13138    accesses; *LOAD_OFFSET is set to the first memory location's offset
13139    from that base register.
13140    REGS is an array filled in with the destination register numbers.
13141    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13142    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13143    the sequence of registers in REGS matches the loads from ascending memory
13144    locations, and the function verifies that the register numbers are
13145    themselves ascending.  If CHECK_REGS is false, the register numbers
13146    are stored in the order they are found in the operands.  */
13147 static int
13148 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13149                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13150 {
13151   int unsorted_regs[MAX_LDM_STM_OPS];
13152   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13153   int order[MAX_LDM_STM_OPS];
13154   rtx base_reg_rtx = NULL;
13155   int base_reg = -1;
13156   int i, ldm_case;
13157
13158   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13159      easily extended if required.  */
13160   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13161
13162   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13163
13164   /* Loop over the operands and check that the memory references are
13165      suitable (i.e. immediate offsets from the same base register).  At
13166      the same time, extract the target register, and the memory
13167      offsets.  */
13168   for (i = 0; i < nops; i++)
13169     {
13170       rtx reg;
13171       rtx offset;
13172
13173       /* Convert a subreg of a mem into the mem itself.  */
13174       if (GET_CODE (operands[nops + i]) == SUBREG)
13175         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13176
13177       gcc_assert (MEM_P (operands[nops + i]));
13178
13179       /* Don't reorder volatile memory references; it doesn't seem worth
13180          looking for the case where the order is ok anyway.  */
13181       if (MEM_VOLATILE_P (operands[nops + i]))
13182         return 0;
13183
13184       offset = const0_rtx;
13185
13186       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13187            || (GET_CODE (reg) == SUBREG
13188                && REG_P (reg = SUBREG_REG (reg))))
13189           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13190               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13191                   || (GET_CODE (reg) == SUBREG
13192                       && REG_P (reg = SUBREG_REG (reg))))
13193               && (CONST_INT_P (offset
13194                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13195         {
13196           if (i == 0)
13197             {
13198               base_reg = REGNO (reg);
13199               base_reg_rtx = reg;
13200               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13201                 return 0;
13202             }
13203           else if (base_reg != (int) REGNO (reg))
13204             /* Not addressed from the same base register.  */
13205             return 0;
13206
13207           unsorted_regs[i] = (REG_P (operands[i])
13208                               ? REGNO (operands[i])
13209                               : REGNO (SUBREG_REG (operands[i])));
13210
13211           /* If it isn't an integer register, or if it overwrites the
13212              base register but isn't the last insn in the list, then
13213              we can't do this.  */
13214           if (unsorted_regs[i] < 0
13215               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13216               || unsorted_regs[i] > 14
13217               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13218             return 0;
13219
13220           /* Don't allow SP to be loaded unless it is also the base
13221              register.  It guarantees that SP is reset correctly when
13222              an LDM instruction is interrupted.  Otherwise, we might
13223              end up with a corrupt stack.  */
13224           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13225             return 0;
13226
13227           unsorted_offsets[i] = INTVAL (offset);
13228           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13229             order[0] = i;
13230         }
13231       else
13232         /* Not a suitable memory address.  */
13233         return 0;
13234     }
13235
13236   /* All the useful information has now been extracted from the
13237      operands into unsorted_regs and unsorted_offsets; additionally,
13238      order[0] has been set to the lowest offset in the list.  Sort
13239      the offsets into order, verifying that they are adjacent, and
13240      check that the register numbers are ascending.  */
13241   if (!compute_offset_order (nops, unsorted_offsets, order,
13242                              check_regs ? unsorted_regs : NULL))
13243     return 0;
13244
13245   if (saved_order)
13246     memcpy (saved_order, order, sizeof order);
13247
13248   if (base)
13249     {
13250       *base = base_reg;
13251
13252       for (i = 0; i < nops; i++)
13253         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13254
13255       *load_offset = unsorted_offsets[order[0]];
13256     }
13257
13258   if (TARGET_THUMB1
13259       && !peep2_reg_dead_p (nops, base_reg_rtx))
13260     return 0;
13261
13262   if (unsorted_offsets[order[0]] == 0)
13263     ldm_case = 1; /* ldmia */
13264   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13265     ldm_case = 2; /* ldmib */
13266   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13267     ldm_case = 3; /* ldmda */
13268   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13269     ldm_case = 4; /* ldmdb */
13270   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13271            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13272     ldm_case = 5;
13273   else
13274     return 0;
13275
13276   if (!multiple_operation_profitable_p (false, nops,
13277                                         ldm_case == 5
13278                                         ? unsorted_offsets[order[0]] : 0))
13279     return 0;
13280
13281   return ldm_case;
13282 }
13283
13284 /* Used to determine in a peephole whether a sequence of store instructions can
13285    be changed into a store-multiple instruction.
13286    NOPS is the number of separate store instructions we are examining.
13287    NOPS_TOTAL is the total number of instructions recognized by the peephole
13288    pattern.
13289    The first NOPS entries in OPERANDS are the source registers, the next
13290    NOPS entries are memory operands.  If this function is successful, *BASE is
13291    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13292    to the first memory location's offset from that base register.  REGS is an
13293    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13294    likewise filled with the corresponding rtx's.
13295    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13296    numbers to an ascending order of stores.
13297    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13298    from ascending memory locations, and the function verifies that the register
13299    numbers are themselves ascending.  If CHECK_REGS is false, the register
13300    numbers are stored in the order they are found in the operands.  */
13301 static int
13302 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13303                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13304                          HOST_WIDE_INT *load_offset, bool check_regs)
13305 {
13306   int unsorted_regs[MAX_LDM_STM_OPS];
13307   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13308   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13309   int order[MAX_LDM_STM_OPS];
13310   int base_reg = -1;
13311   rtx base_reg_rtx = NULL;
13312   int i, stm_case;
13313
13314   /* Write back of base register is currently only supported for Thumb 1.  */
13315   int base_writeback = TARGET_THUMB1;
13316
13317   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13318      easily extended if required.  */
13319   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13320
13321   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13322
13323   /* Loop over the operands and check that the memory references are
13324      suitable (i.e. immediate offsets from the same base register).  At
13325      the same time, extract the target register, and the memory
13326      offsets.  */
13327   for (i = 0; i < nops; i++)
13328     {
13329       rtx reg;
13330       rtx offset;
13331
13332       /* Convert a subreg of a mem into the mem itself.  */
13333       if (GET_CODE (operands[nops + i]) == SUBREG)
13334         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13335
13336       gcc_assert (MEM_P (operands[nops + i]));
13337
13338       /* Don't reorder volatile memory references; it doesn't seem worth
13339          looking for the case where the order is ok anyway.  */
13340       if (MEM_VOLATILE_P (operands[nops + i]))
13341         return 0;
13342
13343       offset = const0_rtx;
13344
13345       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13346            || (GET_CODE (reg) == SUBREG
13347                && REG_P (reg = SUBREG_REG (reg))))
13348           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13349               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13350                   || (GET_CODE (reg) == SUBREG
13351                       && REG_P (reg = SUBREG_REG (reg))))
13352               && (CONST_INT_P (offset
13353                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13354         {
13355           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13356                                   ? operands[i] : SUBREG_REG (operands[i]));
13357           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13358
13359           if (i == 0)
13360             {
13361               base_reg = REGNO (reg);
13362               base_reg_rtx = reg;
13363               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13364                 return 0;
13365             }
13366           else if (base_reg != (int) REGNO (reg))
13367             /* Not addressed from the same base register.  */
13368             return 0;
13369
13370           /* If it isn't an integer register, then we can't do this.  */
13371           if (unsorted_regs[i] < 0
13372               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13373               /* The effects are unpredictable if the base register is
13374                  both updated and stored.  */
13375               || (base_writeback && unsorted_regs[i] == base_reg)
13376               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13377               || unsorted_regs[i] > 14)
13378             return 0;
13379
13380           unsorted_offsets[i] = INTVAL (offset);
13381           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13382             order[0] = i;
13383         }
13384       else
13385         /* Not a suitable memory address.  */
13386         return 0;
13387     }
13388
13389   /* All the useful information has now been extracted from the
13390      operands into unsorted_regs and unsorted_offsets; additionally,
13391      order[0] has been set to the lowest offset in the list.  Sort
13392      the offsets into order, verifying that they are adjacent, and
13393      check that the register numbers are ascending.  */
13394   if (!compute_offset_order (nops, unsorted_offsets, order,
13395                              check_regs ? unsorted_regs : NULL))
13396     return 0;
13397
13398   if (saved_order)
13399     memcpy (saved_order, order, sizeof order);
13400
13401   if (base)
13402     {
13403       *base = base_reg;
13404
13405       for (i = 0; i < nops; i++)
13406         {
13407           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13408           if (reg_rtxs)
13409             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13410         }
13411
13412       *load_offset = unsorted_offsets[order[0]];
13413     }
13414
13415   if (TARGET_THUMB1
13416       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13417     return 0;
13418
13419   if (unsorted_offsets[order[0]] == 0)
13420     stm_case = 1; /* stmia */
13421   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13422     stm_case = 2; /* stmib */
13423   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13424     stm_case = 3; /* stmda */
13425   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13426     stm_case = 4; /* stmdb */
13427   else
13428     return 0;
13429
13430   if (!multiple_operation_profitable_p (false, nops, 0))
13431     return 0;
13432
13433   return stm_case;
13434 }
13435 \f
13436 /* Routines for use in generating RTL.  */
13437
13438 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13439    the instruction; REGS and MEMS are arrays containing the operands.
13440    BASEREG is the base register to be used in addressing the memory operands.
13441    WBACK_OFFSET is nonzero if the instruction should update the base
13442    register.  */
13443
13444 static rtx
13445 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13446                          HOST_WIDE_INT wback_offset)
13447 {
13448   int i = 0, j;
13449   rtx result;
13450
13451   if (!multiple_operation_profitable_p (false, count, 0))
13452     {
13453       rtx seq;
13454
13455       start_sequence ();
13456
13457       for (i = 0; i < count; i++)
13458         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13459
13460       if (wback_offset != 0)
13461         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13462
13463       seq = get_insns ();
13464       end_sequence ();
13465
13466       return seq;
13467     }
13468
13469   result = gen_rtx_PARALLEL (VOIDmode,
13470                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13471   if (wback_offset != 0)
13472     {
13473       XVECEXP (result, 0, 0)
13474         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13475       i = 1;
13476       count++;
13477     }
13478
13479   for (j = 0; i < count; i++, j++)
13480     XVECEXP (result, 0, i)
13481       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13482
13483   return result;
13484 }
13485
13486 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13487    the instruction; REGS and MEMS are arrays containing the operands.
13488    BASEREG is the base register to be used in addressing the memory operands.
13489    WBACK_OFFSET is nonzero if the instruction should update the base
13490    register.  */
13491
13492 static rtx
13493 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13494                           HOST_WIDE_INT wback_offset)
13495 {
13496   int i = 0, j;
13497   rtx result;
13498
13499   if (GET_CODE (basereg) == PLUS)
13500     basereg = XEXP (basereg, 0);
13501
13502   if (!multiple_operation_profitable_p (false, count, 0))
13503     {
13504       rtx seq;
13505
13506       start_sequence ();
13507
13508       for (i = 0; i < count; i++)
13509         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13510
13511       if (wback_offset != 0)
13512         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13513
13514       seq = get_insns ();
13515       end_sequence ();
13516
13517       return seq;
13518     }
13519
13520   result = gen_rtx_PARALLEL (VOIDmode,
13521                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13522   if (wback_offset != 0)
13523     {
13524       XVECEXP (result, 0, 0)
13525         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13526       i = 1;
13527       count++;
13528     }
13529
13530   for (j = 0; i < count; i++, j++)
13531     XVECEXP (result, 0, i)
13532       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13533
13534   return result;
13535 }
13536
13537 /* Generate either a load-multiple or a store-multiple instruction.  This
13538    function can be used in situations where we can start with a single MEM
13539    rtx and adjust its address upwards.
13540    COUNT is the number of operations in the instruction, not counting a
13541    possible update of the base register.  REGS is an array containing the
13542    register operands.
13543    BASEREG is the base register to be used in addressing the memory operands,
13544    which are constructed from BASEMEM.
13545    WRITE_BACK specifies whether the generated instruction should include an
13546    update of the base register.
13547    OFFSETP is used to pass an offset to and from this function; this offset
13548    is not used when constructing the address (instead BASEMEM should have an
13549    appropriate offset in its address), it is used only for setting
13550    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13551
13552 static rtx
13553 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13554                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13555 {
13556   rtx mems[MAX_LDM_STM_OPS];
13557   HOST_WIDE_INT offset = *offsetp;
13558   int i;
13559
13560   gcc_assert (count <= MAX_LDM_STM_OPS);
13561
13562   if (GET_CODE (basereg) == PLUS)
13563     basereg = XEXP (basereg, 0);
13564
13565   for (i = 0; i < count; i++)
13566     {
13567       rtx addr = plus_constant (Pmode, basereg, i * 4);
13568       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13569       offset += 4;
13570     }
13571
13572   if (write_back)
13573     *offsetp = offset;
13574
13575   if (is_load)
13576     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13577                                     write_back ? 4 * count : 0);
13578   else
13579     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13580                                      write_back ? 4 * count : 0);
13581 }
13582
13583 rtx
13584 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13585                        rtx basemem, HOST_WIDE_INT *offsetp)
13586 {
13587   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13588                               offsetp);
13589 }
13590
13591 rtx
13592 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13593                         rtx basemem, HOST_WIDE_INT *offsetp)
13594 {
13595   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13596                               offsetp);
13597 }
13598
13599 /* Called from a peephole2 expander to turn a sequence of loads into an
13600    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13601    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13602    is true if we can reorder the registers because they are used commutatively
13603    subsequently.
13604    Returns true iff we could generate a new instruction.  */
13605
13606 bool
13607 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13608 {
13609   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13610   rtx mems[MAX_LDM_STM_OPS];
13611   int i, j, base_reg;
13612   rtx base_reg_rtx;
13613   HOST_WIDE_INT offset;
13614   int write_back = FALSE;
13615   int ldm_case;
13616   rtx addr;
13617
13618   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13619                                      &base_reg, &offset, !sort_regs);
13620
13621   if (ldm_case == 0)
13622     return false;
13623
13624   if (sort_regs)
13625     for (i = 0; i < nops - 1; i++)
13626       for (j = i + 1; j < nops; j++)
13627         if (regs[i] > regs[j])
13628           {
13629             int t = regs[i];
13630             regs[i] = regs[j];
13631             regs[j] = t;
13632           }
13633   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13634
13635   if (TARGET_THUMB1)
13636     {
13637       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13638       gcc_assert (ldm_case == 1 || ldm_case == 5);
13639       write_back = TRUE;
13640     }
13641
13642   if (ldm_case == 5)
13643     {
13644       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13645       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13646       offset = 0;
13647       if (!TARGET_THUMB1)
13648         base_reg_rtx = newbase;
13649     }
13650
13651   for (i = 0; i < nops; i++)
13652     {
13653       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13654       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13655                                               SImode, addr, 0);
13656     }
13657   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13658                                       write_back ? offset + i * 4 : 0));
13659   return true;
13660 }
13661
13662 /* Called from a peephole2 expander to turn a sequence of stores into an
13663    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13664    NOPS indicates how many separate stores we are trying to combine.
13665    Returns true iff we could generate a new instruction.  */
13666
13667 bool
13668 gen_stm_seq (rtx *operands, int nops)
13669 {
13670   int i;
13671   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13672   rtx mems[MAX_LDM_STM_OPS];
13673   int base_reg;
13674   rtx base_reg_rtx;
13675   HOST_WIDE_INT offset;
13676   int write_back = FALSE;
13677   int stm_case;
13678   rtx addr;
13679   bool base_reg_dies;
13680
13681   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13682                                       mem_order, &base_reg, &offset, true);
13683
13684   if (stm_case == 0)
13685     return false;
13686
13687   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13688
13689   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13690   if (TARGET_THUMB1)
13691     {
13692       gcc_assert (base_reg_dies);
13693       write_back = TRUE;
13694     }
13695
13696   if (stm_case == 5)
13697     {
13698       gcc_assert (base_reg_dies);
13699       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13700       offset = 0;
13701     }
13702
13703   addr = plus_constant (Pmode, base_reg_rtx, offset);
13704
13705   for (i = 0; i < nops; i++)
13706     {
13707       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13708       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13709                                               SImode, addr, 0);
13710     }
13711   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13712                                        write_back ? offset + i * 4 : 0));
13713   return true;
13714 }
13715
13716 /* Called from a peephole2 expander to turn a sequence of stores that are
13717    preceded by constant loads into an STM instruction.  OPERANDS are the
13718    operands found by the peephole matcher; NOPS indicates how many
13719    separate stores we are trying to combine; there are 2 * NOPS
13720    instructions in the peephole.
13721    Returns true iff we could generate a new instruction.  */
13722
13723 bool
13724 gen_const_stm_seq (rtx *operands, int nops)
13725 {
13726   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13727   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13728   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13729   rtx mems[MAX_LDM_STM_OPS];
13730   int base_reg;
13731   rtx base_reg_rtx;
13732   HOST_WIDE_INT offset;
13733   int write_back = FALSE;
13734   int stm_case;
13735   rtx addr;
13736   bool base_reg_dies;
13737   int i, j;
13738   HARD_REG_SET allocated;
13739
13740   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13741                                       mem_order, &base_reg, &offset, false);
13742
13743   if (stm_case == 0)
13744     return false;
13745
13746   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13747
13748   /* If the same register is used more than once, try to find a free
13749      register.  */
13750   CLEAR_HARD_REG_SET (allocated);
13751   for (i = 0; i < nops; i++)
13752     {
13753       for (j = i + 1; j < nops; j++)
13754         if (regs[i] == regs[j])
13755           {
13756             rtx t = peep2_find_free_register (0, nops * 2,
13757                                               TARGET_THUMB1 ? "l" : "r",
13758                                               SImode, &allocated);
13759             if (t == NULL_RTX)
13760               return false;
13761             reg_rtxs[i] = t;
13762             regs[i] = REGNO (t);
13763           }
13764     }
13765
13766   /* Compute an ordering that maps the register numbers to an ascending
13767      sequence.  */
13768   reg_order[0] = 0;
13769   for (i = 0; i < nops; i++)
13770     if (regs[i] < regs[reg_order[0]])
13771       reg_order[0] = i;
13772
13773   for (i = 1; i < nops; i++)
13774     {
13775       int this_order = reg_order[i - 1];
13776       for (j = 0; j < nops; j++)
13777         if (regs[j] > regs[reg_order[i - 1]]
13778             && (this_order == reg_order[i - 1]
13779                 || regs[j] < regs[this_order]))
13780           this_order = j;
13781       reg_order[i] = this_order;
13782     }
13783
13784   /* Ensure that registers that must be live after the instruction end
13785      up with the correct value.  */
13786   for (i = 0; i < nops; i++)
13787     {
13788       int this_order = reg_order[i];
13789       if ((this_order != mem_order[i]
13790            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13791           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13792         return false;
13793     }
13794
13795   /* Load the constants.  */
13796   for (i = 0; i < nops; i++)
13797     {
13798       rtx op = operands[2 * nops + mem_order[i]];
13799       sorted_regs[i] = regs[reg_order[i]];
13800       emit_move_insn (reg_rtxs[reg_order[i]], op);
13801     }
13802
13803   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13804
13805   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13806   if (TARGET_THUMB1)
13807     {
13808       gcc_assert (base_reg_dies);
13809       write_back = TRUE;
13810     }
13811
13812   if (stm_case == 5)
13813     {
13814       gcc_assert (base_reg_dies);
13815       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13816       offset = 0;
13817     }
13818
13819   addr = plus_constant (Pmode, base_reg_rtx, offset);
13820
13821   for (i = 0; i < nops; i++)
13822     {
13823       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13824       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13825                                               SImode, addr, 0);
13826     }
13827   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13828                                        write_back ? offset + i * 4 : 0));
13829   return true;
13830 }
13831
13832 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13833    unaligned copies on processors which support unaligned semantics for those
13834    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13835    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13836    An interleave factor of 1 (the minimum) will perform no interleaving.
13837    Load/store multiple are used for aligned addresses where possible.  */
13838
13839 static void
13840 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13841                                    HOST_WIDE_INT length,
13842                                    unsigned int interleave_factor)
13843 {
13844   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13845   int *regnos = XALLOCAVEC (int, interleave_factor);
13846   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13847   HOST_WIDE_INT i, j;
13848   HOST_WIDE_INT remaining = length, words;
13849   rtx halfword_tmp = NULL, byte_tmp = NULL;
13850   rtx dst, src;
13851   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13852   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13853   HOST_WIDE_INT srcoffset, dstoffset;
13854   HOST_WIDE_INT src_autoinc, dst_autoinc;
13855   rtx mem, addr;
13856
13857   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13858
13859   /* Use hard registers if we have aligned source or destination so we can use
13860      load/store multiple with contiguous registers.  */
13861   if (dst_aligned || src_aligned)
13862     for (i = 0; i < interleave_factor; i++)
13863       regs[i] = gen_rtx_REG (SImode, i);
13864   else
13865     for (i = 0; i < interleave_factor; i++)
13866       regs[i] = gen_reg_rtx (SImode);
13867
13868   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13869   src = copy_addr_to_reg (XEXP (srcbase, 0));
13870
13871   srcoffset = dstoffset = 0;
13872
13873   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13874      For copying the last bytes we want to subtract this offset again.  */
13875   src_autoinc = dst_autoinc = 0;
13876
13877   for (i = 0; i < interleave_factor; i++)
13878     regnos[i] = i;
13879
13880   /* Copy BLOCK_SIZE_BYTES chunks.  */
13881
13882   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13883     {
13884       /* Load words.  */
13885       if (src_aligned && interleave_factor > 1)
13886         {
13887           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13888                                             TRUE, srcbase, &srcoffset));
13889           src_autoinc += UNITS_PER_WORD * interleave_factor;
13890         }
13891       else
13892         {
13893           for (j = 0; j < interleave_factor; j++)
13894             {
13895               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13896                                                  - src_autoinc));
13897               mem = adjust_automodify_address (srcbase, SImode, addr,
13898                                                srcoffset + j * UNITS_PER_WORD);
13899               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13900             }
13901           srcoffset += block_size_bytes;
13902         }
13903
13904       /* Store words.  */
13905       if (dst_aligned && interleave_factor > 1)
13906         {
13907           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13908                                              TRUE, dstbase, &dstoffset));
13909           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13910         }
13911       else
13912         {
13913           for (j = 0; j < interleave_factor; j++)
13914             {
13915               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13916                                                  - dst_autoinc));
13917               mem = adjust_automodify_address (dstbase, SImode, addr,
13918                                                dstoffset + j * UNITS_PER_WORD);
13919               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13920             }
13921           dstoffset += block_size_bytes;
13922         }
13923
13924       remaining -= block_size_bytes;
13925     }
13926
13927   /* Copy any whole words left (note these aren't interleaved with any
13928      subsequent halfword/byte load/stores in the interests of simplicity).  */
13929
13930   words = remaining / UNITS_PER_WORD;
13931
13932   gcc_assert (words < interleave_factor);
13933
13934   if (src_aligned && words > 1)
13935     {
13936       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13937                                         &srcoffset));
13938       src_autoinc += UNITS_PER_WORD * words;
13939     }
13940   else
13941     {
13942       for (j = 0; j < words; j++)
13943         {
13944           addr = plus_constant (Pmode, src,
13945                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13946           mem = adjust_automodify_address (srcbase, SImode, addr,
13947                                            srcoffset + j * UNITS_PER_WORD);
13948           if (src_aligned)
13949             emit_move_insn (regs[j], mem);
13950           else
13951             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13952         }
13953       srcoffset += words * UNITS_PER_WORD;
13954     }
13955
13956   if (dst_aligned && words > 1)
13957     {
13958       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13959                                          &dstoffset));
13960       dst_autoinc += words * UNITS_PER_WORD;
13961     }
13962   else
13963     {
13964       for (j = 0; j < words; j++)
13965         {
13966           addr = plus_constant (Pmode, dst,
13967                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13968           mem = adjust_automodify_address (dstbase, SImode, addr,
13969                                            dstoffset + j * UNITS_PER_WORD);
13970           if (dst_aligned)
13971             emit_move_insn (mem, regs[j]);
13972           else
13973             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13974         }
13975       dstoffset += words * UNITS_PER_WORD;
13976     }
13977
13978   remaining -= words * UNITS_PER_WORD;
13979
13980   gcc_assert (remaining < 4);
13981
13982   /* Copy a halfword if necessary.  */
13983
13984   if (remaining >= 2)
13985     {
13986       halfword_tmp = gen_reg_rtx (SImode);
13987
13988       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13989       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13990       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13991
13992       /* Either write out immediately, or delay until we've loaded the last
13993          byte, depending on interleave factor.  */
13994       if (interleave_factor == 1)
13995         {
13996           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13997           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13998           emit_insn (gen_unaligned_storehi (mem,
13999                        gen_lowpart (HImode, halfword_tmp)));
14000           halfword_tmp = NULL;
14001           dstoffset += 2;
14002         }
14003
14004       remaining -= 2;
14005       srcoffset += 2;
14006     }
14007
14008   gcc_assert (remaining < 2);
14009
14010   /* Copy last byte.  */
14011
14012   if ((remaining & 1) != 0)
14013     {
14014       byte_tmp = gen_reg_rtx (SImode);
14015
14016       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14017       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14018       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14019
14020       if (interleave_factor == 1)
14021         {
14022           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14023           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14024           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14025           byte_tmp = NULL;
14026           dstoffset++;
14027         }
14028
14029       remaining--;
14030       srcoffset++;
14031     }
14032
14033   /* Store last halfword if we haven't done so already.  */
14034
14035   if (halfword_tmp)
14036     {
14037       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14038       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14039       emit_insn (gen_unaligned_storehi (mem,
14040                    gen_lowpart (HImode, halfword_tmp)));
14041       dstoffset += 2;
14042     }
14043
14044   /* Likewise for last byte.  */
14045
14046   if (byte_tmp)
14047     {
14048       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14049       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14050       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14051       dstoffset++;
14052     }
14053
14054   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14055 }
14056
14057 /* From mips_adjust_block_mem:
14058
14059    Helper function for doing a loop-based block operation on memory
14060    reference MEM.  Each iteration of the loop will operate on LENGTH
14061    bytes of MEM.
14062
14063    Create a new base register for use within the loop and point it to
14064    the start of MEM.  Create a new memory reference that uses this
14065    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14066
14067 static void
14068 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14069                       rtx *loop_mem)
14070 {
14071   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14072
14073   /* Although the new mem does not refer to a known location,
14074      it does keep up to LENGTH bytes of alignment.  */
14075   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14076   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14077 }
14078
14079 /* From mips_block_move_loop:
14080
14081    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14082    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14083    the memory regions do not overlap.  */
14084
14085 static void
14086 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14087                                unsigned int interleave_factor,
14088                                HOST_WIDE_INT bytes_per_iter)
14089 {
14090   rtx src_reg, dest_reg, final_src, test;
14091   HOST_WIDE_INT leftover;
14092
14093   leftover = length % bytes_per_iter;
14094   length -= leftover;
14095
14096   /* Create registers and memory references for use within the loop.  */
14097   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14098   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14099
14100   /* Calculate the value that SRC_REG should have after the last iteration of
14101      the loop.  */
14102   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14103                                    0, 0, OPTAB_WIDEN);
14104
14105   /* Emit the start of the loop.  */
14106   rtx_code_label *label = gen_label_rtx ();
14107   emit_label (label);
14108
14109   /* Emit the loop body.  */
14110   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14111                                      interleave_factor);
14112
14113   /* Move on to the next block.  */
14114   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14115   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14116
14117   /* Emit the loop condition.  */
14118   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14119   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14120
14121   /* Mop up any left-over bytes.  */
14122   if (leftover)
14123     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14124 }
14125
14126 /* Emit a block move when either the source or destination is unaligned (not
14127    aligned to a four-byte boundary).  This may need further tuning depending on
14128    core type, optimize_size setting, etc.  */
14129
14130 static int
14131 arm_movmemqi_unaligned (rtx *operands)
14132 {
14133   HOST_WIDE_INT length = INTVAL (operands[2]);
14134
14135   if (optimize_size)
14136     {
14137       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14138       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14139       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14140          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14141          or dst_aligned though: allow more interleaving in those cases since the
14142          resulting code can be smaller.  */
14143       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14144       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14145
14146       if (length > 12)
14147         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14148                                        interleave_factor, bytes_per_iter);
14149       else
14150         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14151                                            interleave_factor);
14152     }
14153   else
14154     {
14155       /* Note that the loop created by arm_block_move_unaligned_loop may be
14156          subject to loop unrolling, which makes tuning this condition a little
14157          redundant.  */
14158       if (length > 32)
14159         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14160       else
14161         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14162     }
14163
14164   return 1;
14165 }
14166
14167 int
14168 arm_gen_movmemqi (rtx *operands)
14169 {
14170   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14171   HOST_WIDE_INT srcoffset, dstoffset;
14172   rtx src, dst, srcbase, dstbase;
14173   rtx part_bytes_reg = NULL;
14174   rtx mem;
14175
14176   if (!CONST_INT_P (operands[2])
14177       || !CONST_INT_P (operands[3])
14178       || INTVAL (operands[2]) > 64)
14179     return 0;
14180
14181   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14182     return arm_movmemqi_unaligned (operands);
14183
14184   if (INTVAL (operands[3]) & 3)
14185     return 0;
14186
14187   dstbase = operands[0];
14188   srcbase = operands[1];
14189
14190   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14191   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14192
14193   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14194   out_words_to_go = INTVAL (operands[2]) / 4;
14195   last_bytes = INTVAL (operands[2]) & 3;
14196   dstoffset = srcoffset = 0;
14197
14198   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14199     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14200
14201   while (in_words_to_go >= 2)
14202     {
14203       if (in_words_to_go > 4)
14204         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14205                                           TRUE, srcbase, &srcoffset));
14206       else
14207         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14208                                           src, FALSE, srcbase,
14209                                           &srcoffset));
14210
14211       if (out_words_to_go)
14212         {
14213           if (out_words_to_go > 4)
14214             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14215                                                TRUE, dstbase, &dstoffset));
14216           else if (out_words_to_go != 1)
14217             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14218                                                out_words_to_go, dst,
14219                                                (last_bytes == 0
14220                                                 ? FALSE : TRUE),
14221                                                dstbase, &dstoffset));
14222           else
14223             {
14224               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14225               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14226               if (last_bytes != 0)
14227                 {
14228                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14229                   dstoffset += 4;
14230                 }
14231             }
14232         }
14233
14234       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14235       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14236     }
14237
14238   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14239   if (out_words_to_go)
14240     {
14241       rtx sreg;
14242
14243       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14244       sreg = copy_to_reg (mem);
14245
14246       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14247       emit_move_insn (mem, sreg);
14248       in_words_to_go--;
14249
14250       gcc_assert (!in_words_to_go);     /* Sanity check */
14251     }
14252
14253   if (in_words_to_go)
14254     {
14255       gcc_assert (in_words_to_go > 0);
14256
14257       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14258       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14259     }
14260
14261   gcc_assert (!last_bytes || part_bytes_reg);
14262
14263   if (BYTES_BIG_ENDIAN && last_bytes)
14264     {
14265       rtx tmp = gen_reg_rtx (SImode);
14266
14267       /* The bytes we want are in the top end of the word.  */
14268       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14269                               GEN_INT (8 * (4 - last_bytes))));
14270       part_bytes_reg = tmp;
14271
14272       while (last_bytes)
14273         {
14274           mem = adjust_automodify_address (dstbase, QImode,
14275                                            plus_constant (Pmode, dst,
14276                                                           last_bytes - 1),
14277                                            dstoffset + last_bytes - 1);
14278           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14279
14280           if (--last_bytes)
14281             {
14282               tmp = gen_reg_rtx (SImode);
14283               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14284               part_bytes_reg = tmp;
14285             }
14286         }
14287
14288     }
14289   else
14290     {
14291       if (last_bytes > 1)
14292         {
14293           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14294           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14295           last_bytes -= 2;
14296           if (last_bytes)
14297             {
14298               rtx tmp = gen_reg_rtx (SImode);
14299               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14300               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14301               part_bytes_reg = tmp;
14302               dstoffset += 2;
14303             }
14304         }
14305
14306       if (last_bytes)
14307         {
14308           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14309           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14310         }
14311     }
14312
14313   return 1;
14314 }
14315
14316 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14317 by mode size.  */
14318 inline static rtx
14319 next_consecutive_mem (rtx mem)
14320 {
14321   machine_mode mode = GET_MODE (mem);
14322   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14323   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14324
14325   return adjust_automodify_address (mem, mode, addr, offset);
14326 }
14327
14328 /* Copy using LDRD/STRD instructions whenever possible.
14329    Returns true upon success. */
14330 bool
14331 gen_movmem_ldrd_strd (rtx *operands)
14332 {
14333   unsigned HOST_WIDE_INT len;
14334   HOST_WIDE_INT align;
14335   rtx src, dst, base;
14336   rtx reg0;
14337   bool src_aligned, dst_aligned;
14338   bool src_volatile, dst_volatile;
14339
14340   gcc_assert (CONST_INT_P (operands[2]));
14341   gcc_assert (CONST_INT_P (operands[3]));
14342
14343   len = UINTVAL (operands[2]);
14344   if (len > 64)
14345     return false;
14346
14347   /* Maximum alignment we can assume for both src and dst buffers.  */
14348   align = INTVAL (operands[3]);
14349
14350   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14351     return false;
14352
14353   /* Place src and dst addresses in registers
14354      and update the corresponding mem rtx.  */
14355   dst = operands[0];
14356   dst_volatile = MEM_VOLATILE_P (dst);
14357   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14358   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14359   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14360
14361   src = operands[1];
14362   src_volatile = MEM_VOLATILE_P (src);
14363   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14364   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14365   src = adjust_automodify_address (src, VOIDmode, base, 0);
14366
14367   if (!unaligned_access && !(src_aligned && dst_aligned))
14368     return false;
14369
14370   if (src_volatile || dst_volatile)
14371     return false;
14372
14373   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14374   if (!(dst_aligned || src_aligned))
14375     return arm_gen_movmemqi (operands);
14376
14377   /* If the either src or dst is unaligned we'll be accessing it as pairs
14378      of unaligned SImode accesses.  Otherwise we can generate DImode
14379      ldrd/strd instructions.  */
14380   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14381   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14382
14383   while (len >= 8)
14384     {
14385       len -= 8;
14386       reg0 = gen_reg_rtx (DImode);
14387       rtx low_reg = NULL_RTX;
14388       rtx hi_reg = NULL_RTX;
14389
14390       if (!src_aligned || !dst_aligned)
14391         {
14392           low_reg = gen_lowpart (SImode, reg0);
14393           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14394         }
14395       if (src_aligned)
14396         emit_move_insn (reg0, src);
14397       else
14398         {
14399           emit_insn (gen_unaligned_loadsi (low_reg, src));
14400           src = next_consecutive_mem (src);
14401           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14402         }
14403
14404       if (dst_aligned)
14405         emit_move_insn (dst, reg0);
14406       else
14407         {
14408           emit_insn (gen_unaligned_storesi (dst, low_reg));
14409           dst = next_consecutive_mem (dst);
14410           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14411         }
14412
14413       src = next_consecutive_mem (src);
14414       dst = next_consecutive_mem (dst);
14415     }
14416
14417   gcc_assert (len < 8);
14418   if (len >= 4)
14419     {
14420       /* More than a word but less than a double-word to copy.  Copy a word.  */
14421       reg0 = gen_reg_rtx (SImode);
14422       src = adjust_address (src, SImode, 0);
14423       dst = adjust_address (dst, SImode, 0);
14424       if (src_aligned)
14425         emit_move_insn (reg0, src);
14426       else
14427         emit_insn (gen_unaligned_loadsi (reg0, src));
14428
14429       if (dst_aligned)
14430         emit_move_insn (dst, reg0);
14431       else
14432         emit_insn (gen_unaligned_storesi (dst, reg0));
14433
14434       src = next_consecutive_mem (src);
14435       dst = next_consecutive_mem (dst);
14436       len -= 4;
14437     }
14438
14439   if (len == 0)
14440     return true;
14441
14442   /* Copy the remaining bytes.  */
14443   if (len >= 2)
14444     {
14445       dst = adjust_address (dst, HImode, 0);
14446       src = adjust_address (src, HImode, 0);
14447       reg0 = gen_reg_rtx (SImode);
14448       if (src_aligned)
14449         emit_insn (gen_zero_extendhisi2 (reg0, src));
14450       else
14451         emit_insn (gen_unaligned_loadhiu (reg0, src));
14452
14453       if (dst_aligned)
14454         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14455       else
14456         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14457
14458       src = next_consecutive_mem (src);
14459       dst = next_consecutive_mem (dst);
14460       if (len == 2)
14461         return true;
14462     }
14463
14464   dst = adjust_address (dst, QImode, 0);
14465   src = adjust_address (src, QImode, 0);
14466   reg0 = gen_reg_rtx (QImode);
14467   emit_move_insn (reg0, src);
14468   emit_move_insn (dst, reg0);
14469   return true;
14470 }
14471
14472 /* Select a dominance comparison mode if possible for a test of the general
14473    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14474    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14475    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14476    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14477    In all cases OP will be either EQ or NE, but we don't need to know which
14478    here.  If we are unable to support a dominance comparison we return
14479    CC mode.  This will then fail to match for the RTL expressions that
14480    generate this call.  */
14481 machine_mode
14482 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14483 {
14484   enum rtx_code cond1, cond2;
14485   int swapped = 0;
14486
14487   /* Currently we will probably get the wrong result if the individual
14488      comparisons are not simple.  This also ensures that it is safe to
14489      reverse a comparison if necessary.  */
14490   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14491        != CCmode)
14492       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14493           != CCmode))
14494     return CCmode;
14495
14496   /* The if_then_else variant of this tests the second condition if the
14497      first passes, but is true if the first fails.  Reverse the first
14498      condition to get a true "inclusive-or" expression.  */
14499   if (cond_or == DOM_CC_NX_OR_Y)
14500     cond1 = reverse_condition (cond1);
14501
14502   /* If the comparisons are not equal, and one doesn't dominate the other,
14503      then we can't do this.  */
14504   if (cond1 != cond2
14505       && !comparison_dominates_p (cond1, cond2)
14506       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14507     return CCmode;
14508
14509   if (swapped)
14510     std::swap (cond1, cond2);
14511
14512   switch (cond1)
14513     {
14514     case EQ:
14515       if (cond_or == DOM_CC_X_AND_Y)
14516         return CC_DEQmode;
14517
14518       switch (cond2)
14519         {
14520         case EQ: return CC_DEQmode;
14521         case LE: return CC_DLEmode;
14522         case LEU: return CC_DLEUmode;
14523         case GE: return CC_DGEmode;
14524         case GEU: return CC_DGEUmode;
14525         default: gcc_unreachable ();
14526         }
14527
14528     case LT:
14529       if (cond_or == DOM_CC_X_AND_Y)
14530         return CC_DLTmode;
14531
14532       switch (cond2)
14533         {
14534         case  LT:
14535             return CC_DLTmode;
14536         case LE:
14537           return CC_DLEmode;
14538         case NE:
14539           return CC_DNEmode;
14540         default:
14541           gcc_unreachable ();
14542         }
14543
14544     case GT:
14545       if (cond_or == DOM_CC_X_AND_Y)
14546         return CC_DGTmode;
14547
14548       switch (cond2)
14549         {
14550         case GT:
14551           return CC_DGTmode;
14552         case GE:
14553           return CC_DGEmode;
14554         case NE:
14555           return CC_DNEmode;
14556         default:
14557           gcc_unreachable ();
14558         }
14559
14560     case LTU:
14561       if (cond_or == DOM_CC_X_AND_Y)
14562         return CC_DLTUmode;
14563
14564       switch (cond2)
14565         {
14566         case LTU:
14567           return CC_DLTUmode;
14568         case LEU:
14569           return CC_DLEUmode;
14570         case NE:
14571           return CC_DNEmode;
14572         default:
14573           gcc_unreachable ();
14574         }
14575
14576     case GTU:
14577       if (cond_or == DOM_CC_X_AND_Y)
14578         return CC_DGTUmode;
14579
14580       switch (cond2)
14581         {
14582         case GTU:
14583           return CC_DGTUmode;
14584         case GEU:
14585           return CC_DGEUmode;
14586         case NE:
14587           return CC_DNEmode;
14588         default:
14589           gcc_unreachable ();
14590         }
14591
14592     /* The remaining cases only occur when both comparisons are the
14593        same.  */
14594     case NE:
14595       gcc_assert (cond1 == cond2);
14596       return CC_DNEmode;
14597
14598     case LE:
14599       gcc_assert (cond1 == cond2);
14600       return CC_DLEmode;
14601
14602     case GE:
14603       gcc_assert (cond1 == cond2);
14604       return CC_DGEmode;
14605
14606     case LEU:
14607       gcc_assert (cond1 == cond2);
14608       return CC_DLEUmode;
14609
14610     case GEU:
14611       gcc_assert (cond1 == cond2);
14612       return CC_DGEUmode;
14613
14614     default:
14615       gcc_unreachable ();
14616     }
14617 }
14618
14619 machine_mode
14620 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14621 {
14622   /* All floating point compares return CCFP if it is an equality
14623      comparison, and CCFPE otherwise.  */
14624   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14625     {
14626       switch (op)
14627         {
14628         case EQ:
14629         case NE:
14630         case UNORDERED:
14631         case ORDERED:
14632         case UNLT:
14633         case UNLE:
14634         case UNGT:
14635         case UNGE:
14636         case UNEQ:
14637         case LTGT:
14638           return CCFPmode;
14639
14640         case LT:
14641         case LE:
14642         case GT:
14643         case GE:
14644           return CCFPEmode;
14645
14646         default:
14647           gcc_unreachable ();
14648         }
14649     }
14650
14651   /* A compare with a shifted operand.  Because of canonicalization, the
14652      comparison will have to be swapped when we emit the assembler.  */
14653   if (GET_MODE (y) == SImode
14654       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14655       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14656           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14657           || GET_CODE (x) == ROTATERT))
14658     return CC_SWPmode;
14659
14660   /* This operation is performed swapped, but since we only rely on the Z
14661      flag we don't need an additional mode.  */
14662   if (GET_MODE (y) == SImode
14663       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14664       && GET_CODE (x) == NEG
14665       && (op == EQ || op == NE))
14666     return CC_Zmode;
14667
14668   /* This is a special case that is used by combine to allow a
14669      comparison of a shifted byte load to be split into a zero-extend
14670      followed by a comparison of the shifted integer (only valid for
14671      equalities and unsigned inequalities).  */
14672   if (GET_MODE (x) == SImode
14673       && GET_CODE (x) == ASHIFT
14674       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14675       && GET_CODE (XEXP (x, 0)) == SUBREG
14676       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14677       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14678       && (op == EQ || op == NE
14679           || op == GEU || op == GTU || op == LTU || op == LEU)
14680       && CONST_INT_P (y))
14681     return CC_Zmode;
14682
14683   /* A construct for a conditional compare, if the false arm contains
14684      0, then both conditions must be true, otherwise either condition
14685      must be true.  Not all conditions are possible, so CCmode is
14686      returned if it can't be done.  */
14687   if (GET_CODE (x) == IF_THEN_ELSE
14688       && (XEXP (x, 2) == const0_rtx
14689           || XEXP (x, 2) == const1_rtx)
14690       && COMPARISON_P (XEXP (x, 0))
14691       && COMPARISON_P (XEXP (x, 1)))
14692     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14693                                          INTVAL (XEXP (x, 2)));
14694
14695   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14696   if (GET_CODE (x) == AND
14697       && (op == EQ || op == NE)
14698       && COMPARISON_P (XEXP (x, 0))
14699       && COMPARISON_P (XEXP (x, 1)))
14700     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14701                                          DOM_CC_X_AND_Y);
14702
14703   if (GET_CODE (x) == IOR
14704       && (op == EQ || op == NE)
14705       && COMPARISON_P (XEXP (x, 0))
14706       && COMPARISON_P (XEXP (x, 1)))
14707     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14708                                          DOM_CC_X_OR_Y);
14709
14710   /* An operation (on Thumb) where we want to test for a single bit.
14711      This is done by shifting that bit up into the top bit of a
14712      scratch register; we can then branch on the sign bit.  */
14713   if (TARGET_THUMB1
14714       && GET_MODE (x) == SImode
14715       && (op == EQ || op == NE)
14716       && GET_CODE (x) == ZERO_EXTRACT
14717       && XEXP (x, 1) == const1_rtx)
14718     return CC_Nmode;
14719
14720   /* An operation that sets the condition codes as a side-effect, the
14721      V flag is not set correctly, so we can only use comparisons where
14722      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14723      instead.)  */
14724   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14725   if (GET_MODE (x) == SImode
14726       && y == const0_rtx
14727       && (op == EQ || op == NE || op == LT || op == GE)
14728       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14729           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14730           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14731           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14732           || GET_CODE (x) == LSHIFTRT
14733           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14734           || GET_CODE (x) == ROTATERT
14735           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14736     return CC_NOOVmode;
14737
14738   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14739     return CC_Zmode;
14740
14741   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14742       && GET_CODE (x) == PLUS
14743       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14744     return CC_Cmode;
14745
14746   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14747     {
14748       switch (op)
14749         {
14750         case EQ:
14751         case NE:
14752           /* A DImode comparison against zero can be implemented by
14753              or'ing the two halves together.  */
14754           if (y == const0_rtx)
14755             return CC_Zmode;
14756
14757           /* We can do an equality test in three Thumb instructions.  */
14758           if (!TARGET_32BIT)
14759             return CC_Zmode;
14760
14761           /* FALLTHROUGH */
14762
14763         case LTU:
14764         case LEU:
14765         case GTU:
14766         case GEU:
14767           /* DImode unsigned comparisons can be implemented by cmp +
14768              cmpeq without a scratch register.  Not worth doing in
14769              Thumb-2.  */
14770           if (TARGET_32BIT)
14771             return CC_CZmode;
14772
14773           /* FALLTHROUGH */
14774
14775         case LT:
14776         case LE:
14777         case GT:
14778         case GE:
14779           /* DImode signed and unsigned comparisons can be implemented
14780              by cmp + sbcs with a scratch register, but that does not
14781              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14782           gcc_assert (op != EQ && op != NE);
14783           return CC_NCVmode;
14784
14785         default:
14786           gcc_unreachable ();
14787         }
14788     }
14789
14790   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14791     return GET_MODE (x);
14792
14793   return CCmode;
14794 }
14795
14796 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14797    return the rtx for register 0 in the proper mode.  FP means this is a
14798    floating point compare: I don't think that it is needed on the arm.  */
14799 rtx
14800 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14801 {
14802   machine_mode mode;
14803   rtx cc_reg;
14804   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14805
14806   /* We might have X as a constant, Y as a register because of the predicates
14807      used for cmpdi.  If so, force X to a register here.  */
14808   if (dimode_comparison && !REG_P (x))
14809     x = force_reg (DImode, x);
14810
14811   mode = SELECT_CC_MODE (code, x, y);
14812   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14813
14814   if (dimode_comparison
14815       && mode != CC_CZmode)
14816     {
14817       rtx clobber, set;
14818
14819       /* To compare two non-zero values for equality, XOR them and
14820          then compare against zero.  Not used for ARM mode; there
14821          CC_CZmode is cheaper.  */
14822       if (mode == CC_Zmode && y != const0_rtx)
14823         {
14824           gcc_assert (!reload_completed);
14825           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14826           y = const0_rtx;
14827         }
14828
14829       /* A scratch register is required.  */
14830       if (reload_completed)
14831         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14832       else
14833         scratch = gen_rtx_SCRATCH (SImode);
14834
14835       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14836       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14837       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14838     }
14839   else
14840     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14841
14842   return cc_reg;
14843 }
14844
14845 /* Generate a sequence of insns that will generate the correct return
14846    address mask depending on the physical architecture that the program
14847    is running on.  */
14848 rtx
14849 arm_gen_return_addr_mask (void)
14850 {
14851   rtx reg = gen_reg_rtx (Pmode);
14852
14853   emit_insn (gen_return_addr_mask (reg));
14854   return reg;
14855 }
14856
14857 void
14858 arm_reload_in_hi (rtx *operands)
14859 {
14860   rtx ref = operands[1];
14861   rtx base, scratch;
14862   HOST_WIDE_INT offset = 0;
14863
14864   if (GET_CODE (ref) == SUBREG)
14865     {
14866       offset = SUBREG_BYTE (ref);
14867       ref = SUBREG_REG (ref);
14868     }
14869
14870   if (REG_P (ref))
14871     {
14872       /* We have a pseudo which has been spilt onto the stack; there
14873          are two cases here: the first where there is a simple
14874          stack-slot replacement and a second where the stack-slot is
14875          out of range, or is used as a subreg.  */
14876       if (reg_equiv_mem (REGNO (ref)))
14877         {
14878           ref = reg_equiv_mem (REGNO (ref));
14879           base = find_replacement (&XEXP (ref, 0));
14880         }
14881       else
14882         /* The slot is out of range, or was dressed up in a SUBREG.  */
14883         base = reg_equiv_address (REGNO (ref));
14884
14885       /* PR 62554: If there is no equivalent memory location then just move
14886          the value as an SImode register move.  This happens when the target
14887          architecture variant does not have an HImode register move.  */
14888       if (base == NULL)
14889         {
14890           gcc_assert (REG_P (operands[0]));
14891           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14892                                 gen_rtx_SUBREG (SImode, ref, 0)));
14893           return;
14894         }
14895     }
14896   else
14897     base = find_replacement (&XEXP (ref, 0));
14898
14899   /* Handle the case where the address is too complex to be offset by 1.  */
14900   if (GET_CODE (base) == MINUS
14901       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14902     {
14903       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14904
14905       emit_set_insn (base_plus, base);
14906       base = base_plus;
14907     }
14908   else if (GET_CODE (base) == PLUS)
14909     {
14910       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14911       HOST_WIDE_INT hi, lo;
14912
14913       offset += INTVAL (XEXP (base, 1));
14914       base = XEXP (base, 0);
14915
14916       /* Rework the address into a legal sequence of insns.  */
14917       /* Valid range for lo is -4095 -> 4095 */
14918       lo = (offset >= 0
14919             ? (offset & 0xfff)
14920             : -((-offset) & 0xfff));
14921
14922       /* Corner case, if lo is the max offset then we would be out of range
14923          once we have added the additional 1 below, so bump the msb into the
14924          pre-loading insn(s).  */
14925       if (lo == 4095)
14926         lo &= 0x7ff;
14927
14928       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14929              ^ (HOST_WIDE_INT) 0x80000000)
14930             - (HOST_WIDE_INT) 0x80000000);
14931
14932       gcc_assert (hi + lo == offset);
14933
14934       if (hi != 0)
14935         {
14936           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14937
14938           /* Get the base address; addsi3 knows how to handle constants
14939              that require more than one insn.  */
14940           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14941           base = base_plus;
14942           offset = lo;
14943         }
14944     }
14945
14946   /* Operands[2] may overlap operands[0] (though it won't overlap
14947      operands[1]), that's why we asked for a DImode reg -- so we can
14948      use the bit that does not overlap.  */
14949   if (REGNO (operands[2]) == REGNO (operands[0]))
14950     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14951   else
14952     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14953
14954   emit_insn (gen_zero_extendqisi2 (scratch,
14955                                    gen_rtx_MEM (QImode,
14956                                                 plus_constant (Pmode, base,
14957                                                                offset))));
14958   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14959                                    gen_rtx_MEM (QImode,
14960                                                 plus_constant (Pmode, base,
14961                                                                offset + 1))));
14962   if (!BYTES_BIG_ENDIAN)
14963     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14964                    gen_rtx_IOR (SImode,
14965                                 gen_rtx_ASHIFT
14966                                 (SImode,
14967                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14968                                  GEN_INT (8)),
14969                                 scratch));
14970   else
14971     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14972                    gen_rtx_IOR (SImode,
14973                                 gen_rtx_ASHIFT (SImode, scratch,
14974                                                 GEN_INT (8)),
14975                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14976 }
14977
14978 /* Handle storing a half-word to memory during reload by synthesizing as two
14979    byte stores.  Take care not to clobber the input values until after we
14980    have moved them somewhere safe.  This code assumes that if the DImode
14981    scratch in operands[2] overlaps either the input value or output address
14982    in some way, then that value must die in this insn (we absolutely need
14983    two scratch registers for some corner cases).  */
14984 void
14985 arm_reload_out_hi (rtx *operands)
14986 {
14987   rtx ref = operands[0];
14988   rtx outval = operands[1];
14989   rtx base, scratch;
14990   HOST_WIDE_INT offset = 0;
14991
14992   if (GET_CODE (ref) == SUBREG)
14993     {
14994       offset = SUBREG_BYTE (ref);
14995       ref = SUBREG_REG (ref);
14996     }
14997
14998   if (REG_P (ref))
14999     {
15000       /* We have a pseudo which has been spilt onto the stack; there
15001          are two cases here: the first where there is a simple
15002          stack-slot replacement and a second where the stack-slot is
15003          out of range, or is used as a subreg.  */
15004       if (reg_equiv_mem (REGNO (ref)))
15005         {
15006           ref = reg_equiv_mem (REGNO (ref));
15007           base = find_replacement (&XEXP (ref, 0));
15008         }
15009       else
15010         /* The slot is out of range, or was dressed up in a SUBREG.  */
15011         base = reg_equiv_address (REGNO (ref));
15012
15013       /* PR 62254: If there is no equivalent memory location then just move
15014          the value as an SImode register move.  This happens when the target
15015          architecture variant does not have an HImode register move.  */
15016       if (base == NULL)
15017         {
15018           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15019
15020           if (REG_P (outval))
15021             {
15022               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15023                                     gen_rtx_SUBREG (SImode, outval, 0)));
15024             }
15025           else /* SUBREG_P (outval)  */
15026             {
15027               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15028                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15029                                       SUBREG_REG (outval)));
15030               else
15031                 /* FIXME: Handle other cases ?  */
15032                 gcc_unreachable ();
15033             }
15034           return;
15035         }
15036     }
15037   else
15038     base = find_replacement (&XEXP (ref, 0));
15039
15040   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15041
15042   /* Handle the case where the address is too complex to be offset by 1.  */
15043   if (GET_CODE (base) == MINUS
15044       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15045     {
15046       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15047
15048       /* Be careful not to destroy OUTVAL.  */
15049       if (reg_overlap_mentioned_p (base_plus, outval))
15050         {
15051           /* Updating base_plus might destroy outval, see if we can
15052              swap the scratch and base_plus.  */
15053           if (!reg_overlap_mentioned_p (scratch, outval))
15054             std::swap (scratch, base_plus);
15055           else
15056             {
15057               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15058
15059               /* Be conservative and copy OUTVAL into the scratch now,
15060                  this should only be necessary if outval is a subreg
15061                  of something larger than a word.  */
15062               /* XXX Might this clobber base?  I can't see how it can,
15063                  since scratch is known to overlap with OUTVAL, and
15064                  must be wider than a word.  */
15065               emit_insn (gen_movhi (scratch_hi, outval));
15066               outval = scratch_hi;
15067             }
15068         }
15069
15070       emit_set_insn (base_plus, base);
15071       base = base_plus;
15072     }
15073   else if (GET_CODE (base) == PLUS)
15074     {
15075       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15076       HOST_WIDE_INT hi, lo;
15077
15078       offset += INTVAL (XEXP (base, 1));
15079       base = XEXP (base, 0);
15080
15081       /* Rework the address into a legal sequence of insns.  */
15082       /* Valid range for lo is -4095 -> 4095 */
15083       lo = (offset >= 0
15084             ? (offset & 0xfff)
15085             : -((-offset) & 0xfff));
15086
15087       /* Corner case, if lo is the max offset then we would be out of range
15088          once we have added the additional 1 below, so bump the msb into the
15089          pre-loading insn(s).  */
15090       if (lo == 4095)
15091         lo &= 0x7ff;
15092
15093       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15094              ^ (HOST_WIDE_INT) 0x80000000)
15095             - (HOST_WIDE_INT) 0x80000000);
15096
15097       gcc_assert (hi + lo == offset);
15098
15099       if (hi != 0)
15100         {
15101           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15102
15103           /* Be careful not to destroy OUTVAL.  */
15104           if (reg_overlap_mentioned_p (base_plus, outval))
15105             {
15106               /* Updating base_plus might destroy outval, see if we
15107                  can swap the scratch and base_plus.  */
15108               if (!reg_overlap_mentioned_p (scratch, outval))
15109                 std::swap (scratch, base_plus);
15110               else
15111                 {
15112                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15113
15114                   /* Be conservative and copy outval into scratch now,
15115                      this should only be necessary if outval is a
15116                      subreg of something larger than a word.  */
15117                   /* XXX Might this clobber base?  I can't see how it
15118                      can, since scratch is known to overlap with
15119                      outval.  */
15120                   emit_insn (gen_movhi (scratch_hi, outval));
15121                   outval = scratch_hi;
15122                 }
15123             }
15124
15125           /* Get the base address; addsi3 knows how to handle constants
15126              that require more than one insn.  */
15127           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15128           base = base_plus;
15129           offset = lo;
15130         }
15131     }
15132
15133   if (BYTES_BIG_ENDIAN)
15134     {
15135       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15136                                          plus_constant (Pmode, base,
15137                                                         offset + 1)),
15138                             gen_lowpart (QImode, outval)));
15139       emit_insn (gen_lshrsi3 (scratch,
15140                               gen_rtx_SUBREG (SImode, outval, 0),
15141                               GEN_INT (8)));
15142       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15143                                                                 offset)),
15144                             gen_lowpart (QImode, scratch)));
15145     }
15146   else
15147     {
15148       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15149                                                                 offset)),
15150                             gen_lowpart (QImode, outval)));
15151       emit_insn (gen_lshrsi3 (scratch,
15152                               gen_rtx_SUBREG (SImode, outval, 0),
15153                               GEN_INT (8)));
15154       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15155                                          plus_constant (Pmode, base,
15156                                                         offset + 1)),
15157                             gen_lowpart (QImode, scratch)));
15158     }
15159 }
15160
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162    (padded to the size of a word) should be passed in a register.  */
15163
15164 static bool
15165 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15166 {
15167   if (TARGET_AAPCS_BASED)
15168     return must_pass_in_stack_var_size (mode, type);
15169   else
15170     return must_pass_in_stack_var_size_or_pad (mode, type);
15171 }
15172
15173
15174 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15175    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15176    the default.  For AAPCS based ABIs small aggregate types are placed
15177    in the lowest memory address.  */
15178
15179 static pad_direction
15180 arm_function_arg_padding (machine_mode mode, const_tree type)
15181 {
15182   if (!TARGET_AAPCS_BASED)
15183     return default_function_arg_padding (mode, type);
15184
15185   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15186     return PAD_DOWNWARD;
15187
15188   return PAD_UPWARD;
15189 }
15190
15191
15192 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15193    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15194    register has useful data, and return the opposite if the most
15195    significant byte does.  */
15196
15197 bool
15198 arm_pad_reg_upward (machine_mode mode,
15199                     tree type, int first ATTRIBUTE_UNUSED)
15200 {
15201   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15202     {
15203       /* For AAPCS, small aggregates, small fixed-point types,
15204          and small complex types are always padded upwards.  */
15205       if (type)
15206         {
15207           if ((AGGREGATE_TYPE_P (type)
15208                || TREE_CODE (type) == COMPLEX_TYPE
15209                || FIXED_POINT_TYPE_P (type))
15210               && int_size_in_bytes (type) <= 4)
15211             return true;
15212         }
15213       else
15214         {
15215           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15216               && GET_MODE_SIZE (mode) <= 4)
15217             return true;
15218         }
15219     }
15220
15221   /* Otherwise, use default padding.  */
15222   return !BYTES_BIG_ENDIAN;
15223 }
15224
15225 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15226    assuming that the address in the base register is word aligned.  */
15227 bool
15228 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15229 {
15230   HOST_WIDE_INT max_offset;
15231
15232   /* Offset must be a multiple of 4 in Thumb mode.  */
15233   if (TARGET_THUMB2 && ((offset & 3) != 0))
15234     return false;
15235
15236   if (TARGET_THUMB2)
15237     max_offset = 1020;
15238   else if (TARGET_ARM)
15239     max_offset = 255;
15240   else
15241     return false;
15242
15243   return ((offset <= max_offset) && (offset >= -max_offset));
15244 }
15245
15246 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15247    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15248    Assumes that the address in the base register RN is word aligned.  Pattern
15249    guarantees that both memory accesses use the same base register,
15250    the offsets are constants within the range, and the gap between the offsets is 4.
15251    If preload complete then check that registers are legal.  WBACK indicates whether
15252    address is updated.  LOAD indicates whether memory access is load or store.  */
15253 bool
15254 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15255                        bool wback, bool load)
15256 {
15257   unsigned int t, t2, n;
15258
15259   if (!reload_completed)
15260     return true;
15261
15262   if (!offset_ok_for_ldrd_strd (offset))
15263     return false;
15264
15265   t = REGNO (rt);
15266   t2 = REGNO (rt2);
15267   n = REGNO (rn);
15268
15269   if ((TARGET_THUMB2)
15270       && ((wback && (n == t || n == t2))
15271           || (t == SP_REGNUM)
15272           || (t == PC_REGNUM)
15273           || (t2 == SP_REGNUM)
15274           || (t2 == PC_REGNUM)
15275           || (!load && (n == PC_REGNUM))
15276           || (load && (t == t2))
15277           /* Triggers Cortex-M3 LDRD errata.  */
15278           || (!wback && load && fix_cm3_ldrd && (n == t))))
15279     return false;
15280
15281   if ((TARGET_ARM)
15282       && ((wback && (n == t || n == t2))
15283           || (t2 == PC_REGNUM)
15284           || (t % 2 != 0)   /* First destination register is not even.  */
15285           || (t2 != t + 1)
15286           /* PC can be used as base register (for offset addressing only),
15287              but it is depricated.  */
15288           || (n == PC_REGNUM)))
15289     return false;
15290
15291   return true;
15292 }
15293
15294 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15295    operand MEM's address contains an immediate offset from the base
15296    register and has no side effects, in which case it sets BASE and
15297    OFFSET accordingly.  */
15298 static bool
15299 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15300 {
15301   rtx addr;
15302
15303   gcc_assert (base != NULL && offset != NULL);
15304
15305   /* TODO: Handle more general memory operand patterns, such as
15306      PRE_DEC and PRE_INC.  */
15307
15308   if (side_effects_p (mem))
15309     return false;
15310
15311   /* Can't deal with subregs.  */
15312   if (GET_CODE (mem) == SUBREG)
15313     return false;
15314
15315   gcc_assert (MEM_P (mem));
15316
15317   *offset = const0_rtx;
15318
15319   addr = XEXP (mem, 0);
15320
15321   /* If addr isn't valid for DImode, then we can't handle it.  */
15322   if (!arm_legitimate_address_p (DImode, addr,
15323                                  reload_in_progress || reload_completed))
15324     return false;
15325
15326   if (REG_P (addr))
15327     {
15328       *base = addr;
15329       return true;
15330     }
15331   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15332     {
15333       *base = XEXP (addr, 0);
15334       *offset = XEXP (addr, 1);
15335       return (REG_P (*base) && CONST_INT_P (*offset));
15336     }
15337
15338   return false;
15339 }
15340
15341 /* Called from a peephole2 to replace two word-size accesses with a
15342    single LDRD/STRD instruction.  Returns true iff we can generate a
15343    new instruction sequence.  That is, both accesses use the same base
15344    register and the gap between constant offsets is 4.  This function
15345    may reorder its operands to match ldrd/strd RTL templates.
15346    OPERANDS are the operands found by the peephole matcher;
15347    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15348    corresponding memory operands.  LOAD indicaates whether the access
15349    is load or store.  CONST_STORE indicates a store of constant
15350    integer values held in OPERANDS[4,5] and assumes that the pattern
15351    is of length 4 insn, for the purpose of checking dead registers.
15352    COMMUTE indicates that register operands may be reordered.  */
15353 bool
15354 gen_operands_ldrd_strd (rtx *operands, bool load,
15355                         bool const_store, bool commute)
15356 {
15357   int nops = 2;
15358   HOST_WIDE_INT offsets[2], offset;
15359   rtx base = NULL_RTX;
15360   rtx cur_base, cur_offset, tmp;
15361   int i, gap;
15362   HARD_REG_SET regset;
15363
15364   gcc_assert (!const_store || !load);
15365   /* Check that the memory references are immediate offsets from the
15366      same base register.  Extract the base register, the destination
15367      registers, and the corresponding memory offsets.  */
15368   for (i = 0; i < nops; i++)
15369     {
15370       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15371         return false;
15372
15373       if (i == 0)
15374         base = cur_base;
15375       else if (REGNO (base) != REGNO (cur_base))
15376         return false;
15377
15378       offsets[i] = INTVAL (cur_offset);
15379       if (GET_CODE (operands[i]) == SUBREG)
15380         {
15381           tmp = SUBREG_REG (operands[i]);
15382           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15383           operands[i] = tmp;
15384         }
15385     }
15386
15387   /* Make sure there is no dependency between the individual loads.  */
15388   if (load && REGNO (operands[0]) == REGNO (base))
15389     return false; /* RAW */
15390
15391   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15392     return false; /* WAW */
15393
15394   /* If the same input register is used in both stores
15395      when storing different constants, try to find a free register.
15396      For example, the code
15397         mov r0, 0
15398         str r0, [r2]
15399         mov r0, 1
15400         str r0, [r2, #4]
15401      can be transformed into
15402         mov r1, 0
15403         mov r0, 1
15404         strd r1, r0, [r2]
15405      in Thumb mode assuming that r1 is free.
15406      For ARM mode do the same but only if the starting register
15407      can be made to be even.  */
15408   if (const_store
15409       && REGNO (operands[0]) == REGNO (operands[1])
15410       && INTVAL (operands[4]) != INTVAL (operands[5]))
15411     {
15412     if (TARGET_THUMB2)
15413       {
15414         CLEAR_HARD_REG_SET (regset);
15415         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15416         if (tmp == NULL_RTX)
15417           return false;
15418
15419         /* Use the new register in the first load to ensure that
15420            if the original input register is not dead after peephole,
15421            then it will have the correct constant value.  */
15422         operands[0] = tmp;
15423       }
15424     else if (TARGET_ARM)
15425       {
15426         int regno = REGNO (operands[0]);
15427         if (!peep2_reg_dead_p (4, operands[0]))
15428           {
15429             /* When the input register is even and is not dead after the
15430                pattern, it has to hold the second constant but we cannot
15431                form a legal STRD in ARM mode with this register as the second
15432                register.  */
15433             if (regno % 2 == 0)
15434               return false;
15435
15436             /* Is regno-1 free? */
15437             SET_HARD_REG_SET (regset);
15438             CLEAR_HARD_REG_BIT(regset, regno - 1);
15439             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15440             if (tmp == NULL_RTX)
15441               return false;
15442
15443             operands[0] = tmp;
15444           }
15445         else
15446           {
15447             /* Find a DImode register.  */
15448             CLEAR_HARD_REG_SET (regset);
15449             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15450             if (tmp != NULL_RTX)
15451               {
15452                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15453                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15454               }
15455             else
15456               {
15457                 /* Can we use the input register to form a DI register?  */
15458                 SET_HARD_REG_SET (regset);
15459                 CLEAR_HARD_REG_BIT(regset,
15460                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15461                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15462                 if (tmp == NULL_RTX)
15463                   return false;
15464                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15465               }
15466           }
15467
15468         gcc_assert (operands[0] != NULL_RTX);
15469         gcc_assert (operands[1] != NULL_RTX);
15470         gcc_assert (REGNO (operands[0]) % 2 == 0);
15471         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15472       }
15473     }
15474
15475   /* Make sure the instructions are ordered with lower memory access first.  */
15476   if (offsets[0] > offsets[1])
15477     {
15478       gap = offsets[0] - offsets[1];
15479       offset = offsets[1];
15480
15481       /* Swap the instructions such that lower memory is accessed first.  */
15482       std::swap (operands[0], operands[1]);
15483       std::swap (operands[2], operands[3]);
15484       if (const_store)
15485         std::swap (operands[4], operands[5]);
15486     }
15487   else
15488     {
15489       gap = offsets[1] - offsets[0];
15490       offset = offsets[0];
15491     }
15492
15493   /* Make sure accesses are to consecutive memory locations.  */
15494   if (gap != 4)
15495     return false;
15496
15497   /* Make sure we generate legal instructions.  */
15498   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15499                              false, load))
15500     return true;
15501
15502   /* In Thumb state, where registers are almost unconstrained, there
15503      is little hope to fix it.  */
15504   if (TARGET_THUMB2)
15505     return false;
15506
15507   if (load && commute)
15508     {
15509       /* Try reordering registers.  */
15510       std::swap (operands[0], operands[1]);
15511       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15512                                  false, load))
15513         return true;
15514     }
15515
15516   if (const_store)
15517     {
15518       /* If input registers are dead after this pattern, they can be
15519          reordered or replaced by other registers that are free in the
15520          current pattern.  */
15521       if (!peep2_reg_dead_p (4, operands[0])
15522           || !peep2_reg_dead_p (4, operands[1]))
15523         return false;
15524
15525       /* Try to reorder the input registers.  */
15526       /* For example, the code
15527            mov r0, 0
15528            mov r1, 1
15529            str r1, [r2]
15530            str r0, [r2, #4]
15531          can be transformed into
15532            mov r1, 0
15533            mov r0, 1
15534            strd r0, [r2]
15535       */
15536       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15537                                   false, false))
15538         {
15539           std::swap (operands[0], operands[1]);
15540           return true;
15541         }
15542
15543       /* Try to find a free DI register.  */
15544       CLEAR_HARD_REG_SET (regset);
15545       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15546       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15547       while (true)
15548         {
15549           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15550           if (tmp == NULL_RTX)
15551             return false;
15552
15553           /* DREG must be an even-numbered register in DImode.
15554              Split it into SI registers.  */
15555           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15556           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15557           gcc_assert (operands[0] != NULL_RTX);
15558           gcc_assert (operands[1] != NULL_RTX);
15559           gcc_assert (REGNO (operands[0]) % 2 == 0);
15560           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15561
15562           return (operands_ok_ldrd_strd (operands[0], operands[1],
15563                                          base, offset,
15564                                          false, load));
15565         }
15566     }
15567
15568   return false;
15569 }
15570
15571
15572
15573 \f
15574 /* Print a symbolic form of X to the debug file, F.  */
15575 static void
15576 arm_print_value (FILE *f, rtx x)
15577 {
15578   switch (GET_CODE (x))
15579     {
15580     case CONST_INT:
15581       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15582       return;
15583
15584     case CONST_DOUBLE:
15585       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15586       return;
15587
15588     case CONST_VECTOR:
15589       {
15590         int i;
15591
15592         fprintf (f, "<");
15593         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15594           {
15595             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15596             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15597               fputc (',', f);
15598           }
15599         fprintf (f, ">");
15600       }
15601       return;
15602
15603     case CONST_STRING:
15604       fprintf (f, "\"%s\"", XSTR (x, 0));
15605       return;
15606
15607     case SYMBOL_REF:
15608       fprintf (f, "`%s'", XSTR (x, 0));
15609       return;
15610
15611     case LABEL_REF:
15612       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15613       return;
15614
15615     case CONST:
15616       arm_print_value (f, XEXP (x, 0));
15617       return;
15618
15619     case PLUS:
15620       arm_print_value (f, XEXP (x, 0));
15621       fprintf (f, "+");
15622       arm_print_value (f, XEXP (x, 1));
15623       return;
15624
15625     case PC:
15626       fprintf (f, "pc");
15627       return;
15628
15629     default:
15630       fprintf (f, "????");
15631       return;
15632     }
15633 }
15634 \f
15635 /* Routines for manipulation of the constant pool.  */
15636
15637 /* Arm instructions cannot load a large constant directly into a
15638    register; they have to come from a pc relative load.  The constant
15639    must therefore be placed in the addressable range of the pc
15640    relative load.  Depending on the precise pc relative load
15641    instruction the range is somewhere between 256 bytes and 4k.  This
15642    means that we often have to dump a constant inside a function, and
15643    generate code to branch around it.
15644
15645    It is important to minimize this, since the branches will slow
15646    things down and make the code larger.
15647
15648    Normally we can hide the table after an existing unconditional
15649    branch so that there is no interruption of the flow, but in the
15650    worst case the code looks like this:
15651
15652         ldr     rn, L1
15653         ...
15654         b       L2
15655         align
15656         L1:     .long value
15657         L2:
15658         ...
15659
15660         ldr     rn, L3
15661         ...
15662         b       L4
15663         align
15664         L3:     .long value
15665         L4:
15666         ...
15667
15668    We fix this by performing a scan after scheduling, which notices
15669    which instructions need to have their operands fetched from the
15670    constant table and builds the table.
15671
15672    The algorithm starts by building a table of all the constants that
15673    need fixing up and all the natural barriers in the function (places
15674    where a constant table can be dropped without breaking the flow).
15675    For each fixup we note how far the pc-relative replacement will be
15676    able to reach and the offset of the instruction into the function.
15677
15678    Having built the table we then group the fixes together to form
15679    tables that are as large as possible (subject to addressing
15680    constraints) and emit each table of constants after the last
15681    barrier that is within range of all the instructions in the group.
15682    If a group does not contain a barrier, then we forcibly create one
15683    by inserting a jump instruction into the flow.  Once the table has
15684    been inserted, the insns are then modified to reference the
15685    relevant entry in the pool.
15686
15687    Possible enhancements to the algorithm (not implemented) are:
15688
15689    1) For some processors and object formats, there may be benefit in
15690    aligning the pools to the start of cache lines; this alignment
15691    would need to be taken into account when calculating addressability
15692    of a pool.  */
15693
15694 /* These typedefs are located at the start of this file, so that
15695    they can be used in the prototypes there.  This comment is to
15696    remind readers of that fact so that the following structures
15697    can be understood more easily.
15698
15699      typedef struct minipool_node    Mnode;
15700      typedef struct minipool_fixup   Mfix;  */
15701
15702 struct minipool_node
15703 {
15704   /* Doubly linked chain of entries.  */
15705   Mnode * next;
15706   Mnode * prev;
15707   /* The maximum offset into the code that this entry can be placed.  While
15708      pushing fixes for forward references, all entries are sorted in order
15709      of increasing max_address.  */
15710   HOST_WIDE_INT max_address;
15711   /* Similarly for an entry inserted for a backwards ref.  */
15712   HOST_WIDE_INT min_address;
15713   /* The number of fixes referencing this entry.  This can become zero
15714      if we "unpush" an entry.  In this case we ignore the entry when we
15715      come to emit the code.  */
15716   int refcount;
15717   /* The offset from the start of the minipool.  */
15718   HOST_WIDE_INT offset;
15719   /* The value in table.  */
15720   rtx value;
15721   /* The mode of value.  */
15722   machine_mode mode;
15723   /* The size of the value.  With iWMMXt enabled
15724      sizes > 4 also imply an alignment of 8-bytes.  */
15725   int fix_size;
15726 };
15727
15728 struct minipool_fixup
15729 {
15730   Mfix *            next;
15731   rtx_insn *        insn;
15732   HOST_WIDE_INT     address;
15733   rtx *             loc;
15734   machine_mode mode;
15735   int               fix_size;
15736   rtx               value;
15737   Mnode *           minipool;
15738   HOST_WIDE_INT     forwards;
15739   HOST_WIDE_INT     backwards;
15740 };
15741
15742 /* Fixes less than a word need padding out to a word boundary.  */
15743 #define MINIPOOL_FIX_SIZE(mode) \
15744   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15745
15746 static Mnode *  minipool_vector_head;
15747 static Mnode *  minipool_vector_tail;
15748 static rtx_code_label   *minipool_vector_label;
15749 static int      minipool_pad;
15750
15751 /* The linked list of all minipool fixes required for this function.  */
15752 Mfix *          minipool_fix_head;
15753 Mfix *          minipool_fix_tail;
15754 /* The fix entry for the current minipool, once it has been placed.  */
15755 Mfix *          minipool_barrier;
15756
15757 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15758 #define JUMP_TABLES_IN_TEXT_SECTION 0
15759 #endif
15760
15761 static HOST_WIDE_INT
15762 get_jump_table_size (rtx_jump_table_data *insn)
15763 {
15764   /* ADDR_VECs only take room if read-only data does into the text
15765      section.  */
15766   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15767     {
15768       rtx body = PATTERN (insn);
15769       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15770       HOST_WIDE_INT size;
15771       HOST_WIDE_INT modesize;
15772
15773       modesize = GET_MODE_SIZE (GET_MODE (body));
15774       size = modesize * XVECLEN (body, elt);
15775       switch (modesize)
15776         {
15777         case 1:
15778           /* Round up size  of TBB table to a halfword boundary.  */
15779           size = (size + 1) & ~HOST_WIDE_INT_1;
15780           break;
15781         case 2:
15782           /* No padding necessary for TBH.  */
15783           break;
15784         case 4:
15785           /* Add two bytes for alignment on Thumb.  */
15786           if (TARGET_THUMB)
15787             size += 2;
15788           break;
15789         default:
15790           gcc_unreachable ();
15791         }
15792       return size;
15793     }
15794
15795   return 0;
15796 }
15797
15798 /* Return the maximum amount of padding that will be inserted before
15799    label LABEL.  */
15800
15801 static HOST_WIDE_INT
15802 get_label_padding (rtx label)
15803 {
15804   HOST_WIDE_INT align, min_insn_size;
15805
15806   align = 1 << label_to_alignment (label);
15807   min_insn_size = TARGET_THUMB ? 2 : 4;
15808   return align > min_insn_size ? align - min_insn_size : 0;
15809 }
15810
15811 /* Move a minipool fix MP from its current location to before MAX_MP.
15812    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15813    constraints may need updating.  */
15814 static Mnode *
15815 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15816                                HOST_WIDE_INT max_address)
15817 {
15818   /* The code below assumes these are different.  */
15819   gcc_assert (mp != max_mp);
15820
15821   if (max_mp == NULL)
15822     {
15823       if (max_address < mp->max_address)
15824         mp->max_address = max_address;
15825     }
15826   else
15827     {
15828       if (max_address > max_mp->max_address - mp->fix_size)
15829         mp->max_address = max_mp->max_address - mp->fix_size;
15830       else
15831         mp->max_address = max_address;
15832
15833       /* Unlink MP from its current position.  Since max_mp is non-null,
15834        mp->prev must be non-null.  */
15835       mp->prev->next = mp->next;
15836       if (mp->next != NULL)
15837         mp->next->prev = mp->prev;
15838       else
15839         minipool_vector_tail = mp->prev;
15840
15841       /* Re-insert it before MAX_MP.  */
15842       mp->next = max_mp;
15843       mp->prev = max_mp->prev;
15844       max_mp->prev = mp;
15845
15846       if (mp->prev != NULL)
15847         mp->prev->next = mp;
15848       else
15849         minipool_vector_head = mp;
15850     }
15851
15852   /* Save the new entry.  */
15853   max_mp = mp;
15854
15855   /* Scan over the preceding entries and adjust their addresses as
15856      required.  */
15857   while (mp->prev != NULL
15858          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15859     {
15860       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15861       mp = mp->prev;
15862     }
15863
15864   return max_mp;
15865 }
15866
15867 /* Add a constant to the minipool for a forward reference.  Returns the
15868    node added or NULL if the constant will not fit in this pool.  */
15869 static Mnode *
15870 add_minipool_forward_ref (Mfix *fix)
15871 {
15872   /* If set, max_mp is the first pool_entry that has a lower
15873      constraint than the one we are trying to add.  */
15874   Mnode *       max_mp = NULL;
15875   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15876   Mnode *       mp;
15877
15878   /* If the minipool starts before the end of FIX->INSN then this FIX
15879      can not be placed into the current pool.  Furthermore, adding the
15880      new constant pool entry may cause the pool to start FIX_SIZE bytes
15881      earlier.  */
15882   if (minipool_vector_head &&
15883       (fix->address + get_attr_length (fix->insn)
15884        >= minipool_vector_head->max_address - fix->fix_size))
15885     return NULL;
15886
15887   /* Scan the pool to see if a constant with the same value has
15888      already been added.  While we are doing this, also note the
15889      location where we must insert the constant if it doesn't already
15890      exist.  */
15891   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15892     {
15893       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15894           && fix->mode == mp->mode
15895           && (!LABEL_P (fix->value)
15896               || (CODE_LABEL_NUMBER (fix->value)
15897                   == CODE_LABEL_NUMBER (mp->value)))
15898           && rtx_equal_p (fix->value, mp->value))
15899         {
15900           /* More than one fix references this entry.  */
15901           mp->refcount++;
15902           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15903         }
15904
15905       /* Note the insertion point if necessary.  */
15906       if (max_mp == NULL
15907           && mp->max_address > max_address)
15908         max_mp = mp;
15909
15910       /* If we are inserting an 8-bytes aligned quantity and
15911          we have not already found an insertion point, then
15912          make sure that all such 8-byte aligned quantities are
15913          placed at the start of the pool.  */
15914       if (ARM_DOUBLEWORD_ALIGN
15915           && max_mp == NULL
15916           && fix->fix_size >= 8
15917           && mp->fix_size < 8)
15918         {
15919           max_mp = mp;
15920           max_address = mp->max_address;
15921         }
15922     }
15923
15924   /* The value is not currently in the minipool, so we need to create
15925      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15926      the end of the list since the placement is less constrained than
15927      any existing entry.  Otherwise, we insert the new fix before
15928      MAX_MP and, if necessary, adjust the constraints on the other
15929      entries.  */
15930   mp = XNEW (Mnode);
15931   mp->fix_size = fix->fix_size;
15932   mp->mode = fix->mode;
15933   mp->value = fix->value;
15934   mp->refcount = 1;
15935   /* Not yet required for a backwards ref.  */
15936   mp->min_address = -65536;
15937
15938   if (max_mp == NULL)
15939     {
15940       mp->max_address = max_address;
15941       mp->next = NULL;
15942       mp->prev = minipool_vector_tail;
15943
15944       if (mp->prev == NULL)
15945         {
15946           minipool_vector_head = mp;
15947           minipool_vector_label = gen_label_rtx ();
15948         }
15949       else
15950         mp->prev->next = mp;
15951
15952       minipool_vector_tail = mp;
15953     }
15954   else
15955     {
15956       if (max_address > max_mp->max_address - mp->fix_size)
15957         mp->max_address = max_mp->max_address - mp->fix_size;
15958       else
15959         mp->max_address = max_address;
15960
15961       mp->next = max_mp;
15962       mp->prev = max_mp->prev;
15963       max_mp->prev = mp;
15964       if (mp->prev != NULL)
15965         mp->prev->next = mp;
15966       else
15967         minipool_vector_head = mp;
15968     }
15969
15970   /* Save the new entry.  */
15971   max_mp = mp;
15972
15973   /* Scan over the preceding entries and adjust their addresses as
15974      required.  */
15975   while (mp->prev != NULL
15976          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15977     {
15978       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15979       mp = mp->prev;
15980     }
15981
15982   return max_mp;
15983 }
15984
15985 static Mnode *
15986 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15987                                 HOST_WIDE_INT  min_address)
15988 {
15989   HOST_WIDE_INT offset;
15990
15991   /* The code below assumes these are different.  */
15992   gcc_assert (mp != min_mp);
15993
15994   if (min_mp == NULL)
15995     {
15996       if (min_address > mp->min_address)
15997         mp->min_address = min_address;
15998     }
15999   else
16000     {
16001       /* We will adjust this below if it is too loose.  */
16002       mp->min_address = min_address;
16003
16004       /* Unlink MP from its current position.  Since min_mp is non-null,
16005          mp->next must be non-null.  */
16006       mp->next->prev = mp->prev;
16007       if (mp->prev != NULL)
16008         mp->prev->next = mp->next;
16009       else
16010         minipool_vector_head = mp->next;
16011
16012       /* Reinsert it after MIN_MP.  */
16013       mp->prev = min_mp;
16014       mp->next = min_mp->next;
16015       min_mp->next = mp;
16016       if (mp->next != NULL)
16017         mp->next->prev = mp;
16018       else
16019         minipool_vector_tail = mp;
16020     }
16021
16022   min_mp = mp;
16023
16024   offset = 0;
16025   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16026     {
16027       mp->offset = offset;
16028       if (mp->refcount > 0)
16029         offset += mp->fix_size;
16030
16031       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16032         mp->next->min_address = mp->min_address + mp->fix_size;
16033     }
16034
16035   return min_mp;
16036 }
16037
16038 /* Add a constant to the minipool for a backward reference.  Returns the
16039    node added or NULL if the constant will not fit in this pool.
16040
16041    Note that the code for insertion for a backwards reference can be
16042    somewhat confusing because the calculated offsets for each fix do
16043    not take into account the size of the pool (which is still under
16044    construction.  */
16045 static Mnode *
16046 add_minipool_backward_ref (Mfix *fix)
16047 {
16048   /* If set, min_mp is the last pool_entry that has a lower constraint
16049      than the one we are trying to add.  */
16050   Mnode *min_mp = NULL;
16051   /* This can be negative, since it is only a constraint.  */
16052   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16053   Mnode *mp;
16054
16055   /* If we can't reach the current pool from this insn, or if we can't
16056      insert this entry at the end of the pool without pushing other
16057      fixes out of range, then we don't try.  This ensures that we
16058      can't fail later on.  */
16059   if (min_address >= minipool_barrier->address
16060       || (minipool_vector_tail->min_address + fix->fix_size
16061           >= minipool_barrier->address))
16062     return NULL;
16063
16064   /* Scan the pool to see if a constant with the same value has
16065      already been added.  While we are doing this, also note the
16066      location where we must insert the constant if it doesn't already
16067      exist.  */
16068   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16069     {
16070       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16071           && fix->mode == mp->mode
16072           && (!LABEL_P (fix->value)
16073               || (CODE_LABEL_NUMBER (fix->value)
16074                   == CODE_LABEL_NUMBER (mp->value)))
16075           && rtx_equal_p (fix->value, mp->value)
16076           /* Check that there is enough slack to move this entry to the
16077              end of the table (this is conservative).  */
16078           && (mp->max_address
16079               > (minipool_barrier->address
16080                  + minipool_vector_tail->offset
16081                  + minipool_vector_tail->fix_size)))
16082         {
16083           mp->refcount++;
16084           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16085         }
16086
16087       if (min_mp != NULL)
16088         mp->min_address += fix->fix_size;
16089       else
16090         {
16091           /* Note the insertion point if necessary.  */
16092           if (mp->min_address < min_address)
16093             {
16094               /* For now, we do not allow the insertion of 8-byte alignment
16095                  requiring nodes anywhere but at the start of the pool.  */
16096               if (ARM_DOUBLEWORD_ALIGN
16097                   && fix->fix_size >= 8 && mp->fix_size < 8)
16098                 return NULL;
16099               else
16100                 min_mp = mp;
16101             }
16102           else if (mp->max_address
16103                    < minipool_barrier->address + mp->offset + fix->fix_size)
16104             {
16105               /* Inserting before this entry would push the fix beyond
16106                  its maximum address (which can happen if we have
16107                  re-located a forwards fix); force the new fix to come
16108                  after it.  */
16109               if (ARM_DOUBLEWORD_ALIGN
16110                   && fix->fix_size >= 8 && mp->fix_size < 8)
16111                 return NULL;
16112               else
16113                 {
16114                   min_mp = mp;
16115                   min_address = mp->min_address + fix->fix_size;
16116                 }
16117             }
16118           /* Do not insert a non-8-byte aligned quantity before 8-byte
16119              aligned quantities.  */
16120           else if (ARM_DOUBLEWORD_ALIGN
16121                    && fix->fix_size < 8
16122                    && mp->fix_size >= 8)
16123             {
16124               min_mp = mp;
16125               min_address = mp->min_address + fix->fix_size;
16126             }
16127         }
16128     }
16129
16130   /* We need to create a new entry.  */
16131   mp = XNEW (Mnode);
16132   mp->fix_size = fix->fix_size;
16133   mp->mode = fix->mode;
16134   mp->value = fix->value;
16135   mp->refcount = 1;
16136   mp->max_address = minipool_barrier->address + 65536;
16137
16138   mp->min_address = min_address;
16139
16140   if (min_mp == NULL)
16141     {
16142       mp->prev = NULL;
16143       mp->next = minipool_vector_head;
16144
16145       if (mp->next == NULL)
16146         {
16147           minipool_vector_tail = mp;
16148           minipool_vector_label = gen_label_rtx ();
16149         }
16150       else
16151         mp->next->prev = mp;
16152
16153       minipool_vector_head = mp;
16154     }
16155   else
16156     {
16157       mp->next = min_mp->next;
16158       mp->prev = min_mp;
16159       min_mp->next = mp;
16160
16161       if (mp->next != NULL)
16162         mp->next->prev = mp;
16163       else
16164         minipool_vector_tail = mp;
16165     }
16166
16167   /* Save the new entry.  */
16168   min_mp = mp;
16169
16170   if (mp->prev)
16171     mp = mp->prev;
16172   else
16173     mp->offset = 0;
16174
16175   /* Scan over the following entries and adjust their offsets.  */
16176   while (mp->next != NULL)
16177     {
16178       if (mp->next->min_address < mp->min_address + mp->fix_size)
16179         mp->next->min_address = mp->min_address + mp->fix_size;
16180
16181       if (mp->refcount)
16182         mp->next->offset = mp->offset + mp->fix_size;
16183       else
16184         mp->next->offset = mp->offset;
16185
16186       mp = mp->next;
16187     }
16188
16189   return min_mp;
16190 }
16191
16192 static void
16193 assign_minipool_offsets (Mfix *barrier)
16194 {
16195   HOST_WIDE_INT offset = 0;
16196   Mnode *mp;
16197
16198   minipool_barrier = barrier;
16199
16200   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16201     {
16202       mp->offset = offset;
16203
16204       if (mp->refcount > 0)
16205         offset += mp->fix_size;
16206     }
16207 }
16208
16209 /* Output the literal table */
16210 static void
16211 dump_minipool (rtx_insn *scan)
16212 {
16213   Mnode * mp;
16214   Mnode * nmp;
16215   int align64 = 0;
16216
16217   if (ARM_DOUBLEWORD_ALIGN)
16218     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16219       if (mp->refcount > 0 && mp->fix_size >= 8)
16220         {
16221           align64 = 1;
16222           break;
16223         }
16224
16225   if (dump_file)
16226     fprintf (dump_file,
16227              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16228              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16229
16230   scan = emit_label_after (gen_label_rtx (), scan);
16231   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16232   scan = emit_label_after (minipool_vector_label, scan);
16233
16234   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16235     {
16236       if (mp->refcount > 0)
16237         {
16238           if (dump_file)
16239             {
16240               fprintf (dump_file,
16241                        ";;  Offset %u, min %ld, max %ld ",
16242                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16243                        (unsigned long) mp->max_address);
16244               arm_print_value (dump_file, mp->value);
16245               fputc ('\n', dump_file);
16246             }
16247
16248           rtx val = copy_rtx (mp->value);
16249
16250           switch (GET_MODE_SIZE (mp->mode))
16251             {
16252 #ifdef HAVE_consttable_1
16253             case 1:
16254               scan = emit_insn_after (gen_consttable_1 (val), scan);
16255               break;
16256
16257 #endif
16258 #ifdef HAVE_consttable_2
16259             case 2:
16260               scan = emit_insn_after (gen_consttable_2 (val), scan);
16261               break;
16262
16263 #endif
16264 #ifdef HAVE_consttable_4
16265             case 4:
16266               scan = emit_insn_after (gen_consttable_4 (val), scan);
16267               break;
16268
16269 #endif
16270 #ifdef HAVE_consttable_8
16271             case 8:
16272               scan = emit_insn_after (gen_consttable_8 (val), scan);
16273               break;
16274
16275 #endif
16276 #ifdef HAVE_consttable_16
16277             case 16:
16278               scan = emit_insn_after (gen_consttable_16 (val), scan);
16279               break;
16280
16281 #endif
16282             default:
16283               gcc_unreachable ();
16284             }
16285         }
16286
16287       nmp = mp->next;
16288       free (mp);
16289     }
16290
16291   minipool_vector_head = minipool_vector_tail = NULL;
16292   scan = emit_insn_after (gen_consttable_end (), scan);
16293   scan = emit_barrier_after (scan);
16294 }
16295
16296 /* Return the cost of forcibly inserting a barrier after INSN.  */
16297 static int
16298 arm_barrier_cost (rtx_insn *insn)
16299 {
16300   /* Basing the location of the pool on the loop depth is preferable,
16301      but at the moment, the basic block information seems to be
16302      corrupt by this stage of the compilation.  */
16303   int base_cost = 50;
16304   rtx_insn *next = next_nonnote_insn (insn);
16305
16306   if (next != NULL && LABEL_P (next))
16307     base_cost -= 20;
16308
16309   switch (GET_CODE (insn))
16310     {
16311     case CODE_LABEL:
16312       /* It will always be better to place the table before the label, rather
16313          than after it.  */
16314       return 50;
16315
16316     case INSN:
16317     case CALL_INSN:
16318       return base_cost;
16319
16320     case JUMP_INSN:
16321       return base_cost - 10;
16322
16323     default:
16324       return base_cost + 10;
16325     }
16326 }
16327
16328 /* Find the best place in the insn stream in the range
16329    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330    Create the barrier by inserting a jump and add a new fix entry for
16331    it.  */
16332 static Mfix *
16333 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16334 {
16335   HOST_WIDE_INT count = 0;
16336   rtx_barrier *barrier;
16337   rtx_insn *from = fix->insn;
16338   /* The instruction after which we will insert the jump.  */
16339   rtx_insn *selected = NULL;
16340   int selected_cost;
16341   /* The address at which the jump instruction will be placed.  */
16342   HOST_WIDE_INT selected_address;
16343   Mfix * new_fix;
16344   HOST_WIDE_INT max_count = max_address - fix->address;
16345   rtx_code_label *label = gen_label_rtx ();
16346
16347   selected_cost = arm_barrier_cost (from);
16348   selected_address = fix->address;
16349
16350   while (from && count < max_count)
16351     {
16352       rtx_jump_table_data *tmp;
16353       int new_cost;
16354
16355       /* This code shouldn't have been called if there was a natural barrier
16356          within range.  */
16357       gcc_assert (!BARRIER_P (from));
16358
16359       /* Count the length of this insn.  This must stay in sync with the
16360          code that pushes minipool fixes.  */
16361       if (LABEL_P (from))
16362         count += get_label_padding (from);
16363       else
16364         count += get_attr_length (from);
16365
16366       /* If there is a jump table, add its length.  */
16367       if (tablejump_p (from, NULL, &tmp))
16368         {
16369           count += get_jump_table_size (tmp);
16370
16371           /* Jump tables aren't in a basic block, so base the cost on
16372              the dispatch insn.  If we select this location, we will
16373              still put the pool after the table.  */
16374           new_cost = arm_barrier_cost (from);
16375
16376           if (count < max_count
16377               && (!selected || new_cost <= selected_cost))
16378             {
16379               selected = tmp;
16380               selected_cost = new_cost;
16381               selected_address = fix->address + count;
16382             }
16383
16384           /* Continue after the dispatch table.  */
16385           from = NEXT_INSN (tmp);
16386           continue;
16387         }
16388
16389       new_cost = arm_barrier_cost (from);
16390
16391       if (count < max_count
16392           && (!selected || new_cost <= selected_cost))
16393         {
16394           selected = from;
16395           selected_cost = new_cost;
16396           selected_address = fix->address + count;
16397         }
16398
16399       from = NEXT_INSN (from);
16400     }
16401
16402   /* Make sure that we found a place to insert the jump.  */
16403   gcc_assert (selected);
16404
16405   /* Make sure we do not split a call and its corresponding
16406      CALL_ARG_LOCATION note.  */
16407   if (CALL_P (selected))
16408     {
16409       rtx_insn *next = NEXT_INSN (selected);
16410       if (next && NOTE_P (next)
16411           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16412           selected = next;
16413     }
16414
16415   /* Create a new JUMP_INSN that branches around a barrier.  */
16416   from = emit_jump_insn_after (gen_jump (label), selected);
16417   JUMP_LABEL (from) = label;
16418   barrier = emit_barrier_after (from);
16419   emit_label_after (label, barrier);
16420
16421   /* Create a minipool barrier entry for the new barrier.  */
16422   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16423   new_fix->insn = barrier;
16424   new_fix->address = selected_address;
16425   new_fix->next = fix->next;
16426   fix->next = new_fix;
16427
16428   return new_fix;
16429 }
16430
16431 /* Record that there is a natural barrier in the insn stream at
16432    ADDRESS.  */
16433 static void
16434 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16435 {
16436   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16437
16438   fix->insn = insn;
16439   fix->address = address;
16440
16441   fix->next = NULL;
16442   if (minipool_fix_head != NULL)
16443     minipool_fix_tail->next = fix;
16444   else
16445     minipool_fix_head = fix;
16446
16447   minipool_fix_tail = fix;
16448 }
16449
16450 /* Record INSN, which will need fixing up to load a value from the
16451    minipool.  ADDRESS is the offset of the insn since the start of the
16452    function; LOC is a pointer to the part of the insn which requires
16453    fixing; VALUE is the constant that must be loaded, which is of type
16454    MODE.  */
16455 static void
16456 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16457                    machine_mode mode, rtx value)
16458 {
16459   gcc_assert (!arm_disable_literal_pool);
16460   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16461
16462   fix->insn = insn;
16463   fix->address = address;
16464   fix->loc = loc;
16465   fix->mode = mode;
16466   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16467   fix->value = value;
16468   fix->forwards = get_attr_pool_range (insn);
16469   fix->backwards = get_attr_neg_pool_range (insn);
16470   fix->minipool = NULL;
16471
16472   /* If an insn doesn't have a range defined for it, then it isn't
16473      expecting to be reworked by this code.  Better to stop now than
16474      to generate duff assembly code.  */
16475   gcc_assert (fix->forwards || fix->backwards);
16476
16477   /* If an entry requires 8-byte alignment then assume all constant pools
16478      require 4 bytes of padding.  Trying to do this later on a per-pool
16479      basis is awkward because existing pool entries have to be modified.  */
16480   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16481     minipool_pad = 4;
16482
16483   if (dump_file)
16484     {
16485       fprintf (dump_file,
16486                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16487                GET_MODE_NAME (mode),
16488                INSN_UID (insn), (unsigned long) address,
16489                -1 * (long)fix->backwards, (long)fix->forwards);
16490       arm_print_value (dump_file, fix->value);
16491       fprintf (dump_file, "\n");
16492     }
16493
16494   /* Add it to the chain of fixes.  */
16495   fix->next = NULL;
16496
16497   if (minipool_fix_head != NULL)
16498     minipool_fix_tail->next = fix;
16499   else
16500     minipool_fix_head = fix;
16501
16502   minipool_fix_tail = fix;
16503 }
16504
16505 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16506    Returns the number of insns needed, or 99 if we always want to synthesize
16507    the value.  */
16508 int
16509 arm_max_const_double_inline_cost ()
16510 {
16511   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16512 }
16513
16514 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16515    Returns the number of insns needed, or 99 if we don't know how to
16516    do it.  */
16517 int
16518 arm_const_double_inline_cost (rtx val)
16519 {
16520   rtx lowpart, highpart;
16521   machine_mode mode;
16522
16523   mode = GET_MODE (val);
16524
16525   if (mode == VOIDmode)
16526     mode = DImode;
16527
16528   gcc_assert (GET_MODE_SIZE (mode) == 8);
16529
16530   lowpart = gen_lowpart (SImode, val);
16531   highpart = gen_highpart_mode (SImode, mode, val);
16532
16533   gcc_assert (CONST_INT_P (lowpart));
16534   gcc_assert (CONST_INT_P (highpart));
16535
16536   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16537                             NULL_RTX, NULL_RTX, 0, 0)
16538           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16539                               NULL_RTX, NULL_RTX, 0, 0));
16540 }
16541
16542 /* Cost of loading a SImode constant.  */
16543 static inline int
16544 arm_const_inline_cost (enum rtx_code code, rtx val)
16545 {
16546   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16547                            NULL_RTX, NULL_RTX, 1, 0);
16548 }
16549
16550 /* Return true if it is worthwhile to split a 64-bit constant into two
16551    32-bit operations.  This is the case if optimizing for size, or
16552    if we have load delay slots, or if one 32-bit part can be done with
16553    a single data operation.  */
16554 bool
16555 arm_const_double_by_parts (rtx val)
16556 {
16557   machine_mode mode = GET_MODE (val);
16558   rtx part;
16559
16560   if (optimize_size || arm_ld_sched)
16561     return true;
16562
16563   if (mode == VOIDmode)
16564     mode = DImode;
16565
16566   part = gen_highpart_mode (SImode, mode, val);
16567
16568   gcc_assert (CONST_INT_P (part));
16569
16570   if (const_ok_for_arm (INTVAL (part))
16571       || const_ok_for_arm (~INTVAL (part)))
16572     return true;
16573
16574   part = gen_lowpart (SImode, val);
16575
16576   gcc_assert (CONST_INT_P (part));
16577
16578   if (const_ok_for_arm (INTVAL (part))
16579       || const_ok_for_arm (~INTVAL (part)))
16580     return true;
16581
16582   return false;
16583 }
16584
16585 /* Return true if it is possible to inline both the high and low parts
16586    of a 64-bit constant into 32-bit data processing instructions.  */
16587 bool
16588 arm_const_double_by_immediates (rtx val)
16589 {
16590   machine_mode mode = GET_MODE (val);
16591   rtx part;
16592
16593   if (mode == VOIDmode)
16594     mode = DImode;
16595
16596   part = gen_highpart_mode (SImode, mode, val);
16597
16598   gcc_assert (CONST_INT_P (part));
16599
16600   if (!const_ok_for_arm (INTVAL (part)))
16601     return false;
16602
16603   part = gen_lowpart (SImode, val);
16604
16605   gcc_assert (CONST_INT_P (part));
16606
16607   if (!const_ok_for_arm (INTVAL (part)))
16608     return false;
16609
16610   return true;
16611 }
16612
16613 /* Scan INSN and note any of its operands that need fixing.
16614    If DO_PUSHES is false we do not actually push any of the fixups
16615    needed.  */
16616 static void
16617 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16618 {
16619   int opno;
16620
16621   extract_constrain_insn (insn);
16622
16623   if (recog_data.n_alternatives == 0)
16624     return;
16625
16626   /* Fill in recog_op_alt with information about the constraints of
16627      this insn.  */
16628   preprocess_constraints (insn);
16629
16630   const operand_alternative *op_alt = which_op_alt ();
16631   for (opno = 0; opno < recog_data.n_operands; opno++)
16632     {
16633       /* Things we need to fix can only occur in inputs.  */
16634       if (recog_data.operand_type[opno] != OP_IN)
16635         continue;
16636
16637       /* If this alternative is a memory reference, then any mention
16638          of constants in this alternative is really to fool reload
16639          into allowing us to accept one there.  We need to fix them up
16640          now so that we output the right code.  */
16641       if (op_alt[opno].memory_ok)
16642         {
16643           rtx op = recog_data.operand[opno];
16644
16645           if (CONSTANT_P (op))
16646             {
16647               if (do_pushes)
16648                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16649                                    recog_data.operand_mode[opno], op);
16650             }
16651           else if (MEM_P (op)
16652                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16653                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16654             {
16655               if (do_pushes)
16656                 {
16657                   rtx cop = avoid_constant_pool_reference (op);
16658
16659                   /* Casting the address of something to a mode narrower
16660                      than a word can cause avoid_constant_pool_reference()
16661                      to return the pool reference itself.  That's no good to
16662                      us here.  Lets just hope that we can use the
16663                      constant pool value directly.  */
16664                   if (op == cop)
16665                     cop = get_pool_constant (XEXP (op, 0));
16666
16667                   push_minipool_fix (insn, address,
16668                                      recog_data.operand_loc[opno],
16669                                      recog_data.operand_mode[opno], cop);
16670                 }
16671
16672             }
16673         }
16674     }
16675
16676   return;
16677 }
16678
16679 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16680    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16681    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16682    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16683    or four masks, depending on whether it is being computed for a
16684    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16685    respectively.  The tree for the type of the argument or a field within an
16686    argument is passed in ARG_TYPE, the current register this argument or field
16687    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16688    argument or field starts at is passed in STARTING_BIT and the last used bit
16689    is kept in LAST_USED_BIT which is also updated accordingly.  */
16690
16691 static unsigned HOST_WIDE_INT
16692 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16693                                uint32_t * padding_bits_to_clear,
16694                                unsigned starting_bit, int * last_used_bit)
16695
16696 {
16697   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16698
16699   if (TREE_CODE (arg_type) == RECORD_TYPE)
16700     {
16701       unsigned current_bit = starting_bit;
16702       tree field;
16703       long int offset, size;
16704
16705
16706       field = TYPE_FIELDS (arg_type);
16707       while (field)
16708         {
16709           /* The offset within a structure is always an offset from
16710              the start of that structure.  Make sure we take that into the
16711              calculation of the register based offset that we use here.  */
16712           offset = starting_bit;
16713           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16714           offset %= 32;
16715
16716           /* This is the actual size of the field, for bitfields this is the
16717              bitfield width and not the container size.  */
16718           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16719
16720           if (*last_used_bit != offset)
16721             {
16722               if (offset < *last_used_bit)
16723                 {
16724                   /* This field's offset is before the 'last_used_bit', that
16725                      means this field goes on the next register.  So we need to
16726                      pad the rest of the current register and increase the
16727                      register number.  */
16728                   uint32_t mask;
16729                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16730                   mask++;
16731
16732                   padding_bits_to_clear[*regno] |= mask;
16733                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16734                   (*regno)++;
16735                 }
16736               else
16737                 {
16738                   /* Otherwise we pad the bits between the last field's end and
16739                      the start of the new field.  */
16740                   uint32_t mask;
16741
16742                   mask = ((uint32_t)-1) >> (32 - offset);
16743                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16744                   padding_bits_to_clear[*regno] |= mask;
16745                 }
16746               current_bit = offset;
16747             }
16748
16749           /* Calculate further padding bits for inner structs/unions too.  */
16750           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16751             {
16752               *last_used_bit = current_bit;
16753               not_to_clear_reg_mask
16754                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16755                                                   padding_bits_to_clear, offset,
16756                                                   last_used_bit);
16757             }
16758           else
16759             {
16760               /* Update 'current_bit' with this field's size.  If the
16761                  'current_bit' lies in a subsequent register, update 'regno' and
16762                  reset 'current_bit' to point to the current bit in that new
16763                  register.  */
16764               current_bit += size;
16765               while (current_bit >= 32)
16766                 {
16767                   current_bit-=32;
16768                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16769                   (*regno)++;
16770                 }
16771               *last_used_bit = current_bit;
16772             }
16773
16774           field = TREE_CHAIN (field);
16775         }
16776       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16777     }
16778   else if (TREE_CODE (arg_type) == UNION_TYPE)
16779     {
16780       tree field, field_t;
16781       int i, regno_t, field_size;
16782       int max_reg = -1;
16783       int max_bit = -1;
16784       uint32_t mask;
16785       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16786         = {-1, -1, -1, -1};
16787
16788       /* To compute the padding bits in a union we only consider bits as
16789          padding bits if they are always either a padding bit or fall outside a
16790          fields size for all fields in the union.  */
16791       field = TYPE_FIELDS (arg_type);
16792       while (field)
16793         {
16794           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16795             = {0U, 0U, 0U, 0U};
16796           int last_used_bit_t = *last_used_bit;
16797           regno_t = *regno;
16798           field_t = TREE_TYPE (field);
16799
16800           /* If the field's type is either a record or a union make sure to
16801              compute their padding bits too.  */
16802           if (RECORD_OR_UNION_TYPE_P (field_t))
16803             not_to_clear_reg_mask
16804               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16805                                                 &padding_bits_to_clear_t[0],
16806                                                 starting_bit, &last_used_bit_t);
16807           else
16808             {
16809               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16810               regno_t = (field_size / 32) + *regno;
16811               last_used_bit_t = (starting_bit + field_size) % 32;
16812             }
16813
16814           for (i = *regno; i < regno_t; i++)
16815             {
16816               /* For all but the last register used by this field only keep the
16817                  padding bits that were padding bits in this field.  */
16818               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16819             }
16820
16821             /* For the last register, keep all padding bits that were padding
16822                bits in this field and any padding bits that are still valid
16823                as padding bits but fall outside of this field's size.  */
16824             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16825             padding_bits_to_clear_res[regno_t]
16826               &= padding_bits_to_clear_t[regno_t] | mask;
16827
16828           /* Update the maximum size of the fields in terms of registers used
16829              ('max_reg') and the 'last_used_bit' in said register.  */
16830           if (max_reg < regno_t)
16831             {
16832               max_reg = regno_t;
16833               max_bit = last_used_bit_t;
16834             }
16835           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16836             max_bit = last_used_bit_t;
16837
16838           field = TREE_CHAIN (field);
16839         }
16840
16841       /* Update the current padding_bits_to_clear using the intersection of the
16842          padding bits of all the fields.  */
16843       for (i=*regno; i < max_reg; i++)
16844         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16845
16846       /* Do not keep trailing padding bits, we do not know yet whether this
16847          is the end of the argument.  */
16848       mask = ((uint32_t) 1 << max_bit) - 1;
16849       padding_bits_to_clear[max_reg]
16850         |= padding_bits_to_clear_res[max_reg] & mask;
16851
16852       *regno = max_reg;
16853       *last_used_bit = max_bit;
16854     }
16855   else
16856     /* This function should only be used for structs and unions.  */
16857     gcc_unreachable ();
16858
16859   return not_to_clear_reg_mask;
16860 }
16861
16862 /* In the context of ARMv8-M Security Extensions, this function is used for both
16863    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16864    registers are used when returning or passing arguments, which is then
16865    returned as a mask.  It will also compute a mask to indicate padding/unused
16866    bits for each of these registers, and passes this through the
16867    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16868    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16869    the starting register used to pass this argument or return value is passed
16870    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16871    for struct and union types.  */
16872
16873 static unsigned HOST_WIDE_INT
16874 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16875                              uint32_t * padding_bits_to_clear)
16876
16877 {
16878   int last_used_bit = 0;
16879   unsigned HOST_WIDE_INT not_to_clear_mask;
16880
16881   if (RECORD_OR_UNION_TYPE_P (arg_type))
16882     {
16883       not_to_clear_mask
16884         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16885                                          padding_bits_to_clear, 0,
16886                                          &last_used_bit);
16887
16888
16889       /* If the 'last_used_bit' is not zero, that means we are still using a
16890          part of the last 'regno'.  In such cases we must clear the trailing
16891          bits.  Otherwise we are not using regno and we should mark it as to
16892          clear.  */
16893       if (last_used_bit != 0)
16894         padding_bits_to_clear[regno]
16895           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16896       else
16897         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16898     }
16899   else
16900     {
16901       not_to_clear_mask = 0;
16902       /* We are not dealing with structs nor unions.  So these arguments may be
16903          passed in floating point registers too.  In some cases a BLKmode is
16904          used when returning or passing arguments in multiple VFP registers.  */
16905       if (GET_MODE (arg_rtx) == BLKmode)
16906         {
16907           int i, arg_regs;
16908           rtx reg;
16909
16910           /* This should really only occur when dealing with the hard-float
16911              ABI.  */
16912           gcc_assert (TARGET_HARD_FLOAT_ABI);
16913
16914           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16915             {
16916               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16917               gcc_assert (REG_P (reg));
16918
16919               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16920
16921               /* If we are dealing with DF mode, make sure we don't
16922                  clear either of the registers it addresses.  */
16923               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16924               if (arg_regs > 1)
16925                 {
16926                   unsigned HOST_WIDE_INT mask;
16927                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16928                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16929                   not_to_clear_mask |= mask;
16930                 }
16931             }
16932         }
16933       else
16934         {
16935           /* Otherwise we can rely on the MODE to determine how many registers
16936              are being used by this argument.  */
16937           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16938           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16939           if (arg_regs > 1)
16940             {
16941               unsigned HOST_WIDE_INT
16942               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16943               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16944               not_to_clear_mask |= mask;
16945             }
16946         }
16947     }
16948
16949   return not_to_clear_mask;
16950 }
16951
16952 /* Clears caller saved registers not used to pass arguments before a
16953    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16954    registers is done in __gnu_cmse_nonsecure_call libcall.
16955    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16956
16957 static void
16958 cmse_nonsecure_call_clear_caller_saved (void)
16959 {
16960   basic_block bb;
16961
16962   FOR_EACH_BB_FN (bb, cfun)
16963     {
16964       rtx_insn *insn;
16965
16966       FOR_BB_INSNS (bb, insn)
16967         {
16968           uint64_t to_clear_mask, float_mask;
16969           rtx_insn *seq;
16970           rtx pat, call, unspec, reg, cleared_reg, tmp;
16971           unsigned int regno, maxregno;
16972           rtx address;
16973           CUMULATIVE_ARGS args_so_far_v;
16974           cumulative_args_t args_so_far;
16975           tree arg_type, fntype;
16976           bool using_r4, first_param = true;
16977           function_args_iterator args_iter;
16978           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16979           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16980
16981           if (!NONDEBUG_INSN_P (insn))
16982             continue;
16983
16984           if (!CALL_P (insn))
16985             continue;
16986
16987           pat = PATTERN (insn);
16988           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16989           call = XVECEXP (pat, 0, 0);
16990
16991           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16992           if (GET_CODE (call) == SET)
16993               call = SET_SRC (call);
16994
16995           /* Check if it is a cmse_nonsecure_call.  */
16996           unspec = XEXP (call, 0);
16997           if (GET_CODE (unspec) != UNSPEC
16998               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16999             continue;
17000
17001           /* Determine the caller-saved registers we need to clear.  */
17002           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17003           maxregno = NUM_ARG_REGS - 1;
17004           /* Only look at the caller-saved floating point registers in case of
17005              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17006              lazy store and loads which clear both caller- and callee-saved
17007              registers.  */
17008           if (TARGET_HARD_FLOAT_ABI)
17009             {
17010               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17011               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17012               to_clear_mask |= float_mask;
17013               maxregno = D7_VFP_REGNUM;
17014             }
17015
17016           /* Make sure the register used to hold the function address is not
17017              cleared.  */
17018           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17019           gcc_assert (MEM_P (address));
17020           gcc_assert (REG_P (XEXP (address, 0)));
17021           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17022
17023           /* Set basic block of call insn so that df rescan is performed on
17024              insns inserted here.  */
17025           set_block_for_insn (insn, bb);
17026           df_set_flags (DF_DEFER_INSN_RESCAN);
17027           start_sequence ();
17028
17029           /* Make sure the scheduler doesn't schedule other insns beyond
17030              here.  */
17031           emit_insn (gen_blockage ());
17032
17033           /* Walk through all arguments and clear registers appropriately.
17034           */
17035           fntype = TREE_TYPE (MEM_EXPR (address));
17036           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17037                                     NULL_TREE);
17038           args_so_far = pack_cumulative_args (&args_so_far_v);
17039           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17040             {
17041               rtx arg_rtx;
17042               machine_mode arg_mode = TYPE_MODE (arg_type);
17043
17044               if (VOID_TYPE_P (arg_type))
17045                 continue;
17046
17047               if (!first_param)
17048                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17049                                           true);
17050
17051               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17052                                           true);
17053               gcc_assert (REG_P (arg_rtx));
17054               to_clear_mask
17055                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17056                                                REGNO (arg_rtx),
17057                                                padding_bits_to_clear_ptr);
17058
17059               first_param = false;
17060             }
17061
17062           /* Clear padding bits where needed.  */
17063           cleared_reg = XEXP (address, 0);
17064           reg = gen_rtx_REG (SImode, IP_REGNUM);
17065           using_r4 = false;
17066           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17067             {
17068               if (padding_bits_to_clear[regno] == 0)
17069                 continue;
17070
17071               /* If this is a Thumb-1 target copy the address of the function
17072                  we are calling from 'r4' into 'ip' such that we can use r4 to
17073                  clear the unused bits in the arguments.  */
17074               if (TARGET_THUMB1 && !using_r4)
17075                 {
17076                   using_r4 =  true;
17077                   reg = cleared_reg;
17078                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17079                                           reg);
17080                 }
17081
17082               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17083               emit_move_insn (reg, tmp);
17084               /* Also fill the top half of the negated
17085                  padding_bits_to_clear.  */
17086               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17087                 {
17088                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17089                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17090                                                                 GEN_INT (16),
17091                                                                 GEN_INT (16)),
17092                                           tmp));
17093                 }
17094
17095               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17096                                      gen_rtx_REG (SImode, regno),
17097                                      reg));
17098
17099             }
17100           if (using_r4)
17101             emit_move_insn (cleared_reg,
17102                             gen_rtx_REG (SImode, IP_REGNUM));
17103
17104           /* We use right shift and left shift to clear the LSB of the address
17105              we jump to instead of using bic, to avoid having to use an extra
17106              register on Thumb-1.  */
17107           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17108           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17109           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17110           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17111
17112           /* Clearing all registers that leak before doing a non-secure
17113              call.  */
17114           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17115             {
17116               if (!(to_clear_mask & (1LL << regno)))
17117                 continue;
17118
17119               /* If regno is an even vfp register and its successor is also to
17120                  be cleared, use vmov.  */
17121               if (IS_VFP_REGNUM (regno))
17122                 {
17123                   if (TARGET_VFP_DOUBLE
17124                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17125                       && to_clear_mask & (1LL << (regno + 1)))
17126                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17127                                     CONST0_RTX (DFmode));
17128                   else
17129                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17130                                     CONST0_RTX (SFmode));
17131                 }
17132               else
17133                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17134             }
17135
17136           seq = get_insns ();
17137           end_sequence ();
17138           emit_insn_before (seq, insn);
17139
17140         }
17141     }
17142 }
17143
17144 /* Rewrite move insn into subtract of 0 if the condition codes will
17145    be useful in next conditional jump insn.  */
17146
17147 static void
17148 thumb1_reorg (void)
17149 {
17150   basic_block bb;
17151
17152   FOR_EACH_BB_FN (bb, cfun)
17153     {
17154       rtx dest, src;
17155       rtx cmp, op0, op1, set = NULL;
17156       rtx_insn *prev, *insn = BB_END (bb);
17157       bool insn_clobbered = false;
17158
17159       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17160         insn = PREV_INSN (insn);
17161
17162       /* Find the last cbranchsi4_insn in basic block BB.  */
17163       if (insn == BB_HEAD (bb)
17164           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17165         continue;
17166
17167       /* Get the register with which we are comparing.  */
17168       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17169       op0 = XEXP (cmp, 0);
17170       op1 = XEXP (cmp, 1);
17171
17172       /* Check that comparison is against ZERO.  */
17173       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17174         continue;
17175
17176       /* Find the first flag setting insn before INSN in basic block BB.  */
17177       gcc_assert (insn != BB_HEAD (bb));
17178       for (prev = PREV_INSN (insn);
17179            (!insn_clobbered
17180             && prev != BB_HEAD (bb)
17181             && (NOTE_P (prev)
17182                 || DEBUG_INSN_P (prev)
17183                 || ((set = single_set (prev)) != NULL
17184                     && get_attr_conds (prev) == CONDS_NOCOND)));
17185            prev = PREV_INSN (prev))
17186         {
17187           if (reg_set_p (op0, prev))
17188             insn_clobbered = true;
17189         }
17190
17191       /* Skip if op0 is clobbered by insn other than prev. */
17192       if (insn_clobbered)
17193         continue;
17194
17195       if (!set)
17196         continue;
17197
17198       dest = SET_DEST (set);
17199       src = SET_SRC (set);
17200       if (!low_register_operand (dest, SImode)
17201           || !low_register_operand (src, SImode))
17202         continue;
17203
17204       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17205          in INSN.  Both src and dest of the move insn are checked.  */
17206       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17207         {
17208           dest = copy_rtx (dest);
17209           src = copy_rtx (src);
17210           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17211           PATTERN (prev) = gen_rtx_SET (dest, src);
17212           INSN_CODE (prev) = -1;
17213           /* Set test register in INSN to dest.  */
17214           XEXP (cmp, 0) = copy_rtx (dest);
17215           INSN_CODE (insn) = -1;
17216         }
17217     }
17218 }
17219
17220 /* Convert instructions to their cc-clobbering variant if possible, since
17221    that allows us to use smaller encodings.  */
17222
17223 static void
17224 thumb2_reorg (void)
17225 {
17226   basic_block bb;
17227   regset_head live;
17228
17229   INIT_REG_SET (&live);
17230
17231   /* We are freeing block_for_insn in the toplev to keep compatibility
17232      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17233   compute_bb_for_insn ();
17234   df_analyze ();
17235
17236   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17237
17238   FOR_EACH_BB_FN (bb, cfun)
17239     {
17240       if ((current_tune->disparage_flag_setting_t16_encodings
17241            == tune_params::DISPARAGE_FLAGS_ALL)
17242           && optimize_bb_for_speed_p (bb))
17243         continue;
17244
17245       rtx_insn *insn;
17246       Convert_Action action = SKIP;
17247       Convert_Action action_for_partial_flag_setting
17248         = ((current_tune->disparage_flag_setting_t16_encodings
17249             != tune_params::DISPARAGE_FLAGS_NEITHER)
17250            && optimize_bb_for_speed_p (bb))
17251           ? SKIP : CONV;
17252
17253       COPY_REG_SET (&live, DF_LR_OUT (bb));
17254       df_simulate_initialize_backwards (bb, &live);
17255       FOR_BB_INSNS_REVERSE (bb, insn)
17256         {
17257           if (NONJUMP_INSN_P (insn)
17258               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17259               && GET_CODE (PATTERN (insn)) == SET)
17260             {
17261               action = SKIP;
17262               rtx pat = PATTERN (insn);
17263               rtx dst = XEXP (pat, 0);
17264               rtx src = XEXP (pat, 1);
17265               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17266
17267               if (UNARY_P (src) || BINARY_P (src))
17268                   op0 = XEXP (src, 0);
17269
17270               if (BINARY_P (src))
17271                   op1 = XEXP (src, 1);
17272
17273               if (low_register_operand (dst, SImode))
17274                 {
17275                   switch (GET_CODE (src))
17276                     {
17277                     case PLUS:
17278                       /* Adding two registers and storing the result
17279                          in the first source is already a 16-bit
17280                          operation.  */
17281                       if (rtx_equal_p (dst, op0)
17282                           && register_operand (op1, SImode))
17283                         break;
17284
17285                       if (low_register_operand (op0, SImode))
17286                         {
17287                           /* ADDS <Rd>,<Rn>,<Rm>  */
17288                           if (low_register_operand (op1, SImode))
17289                             action = CONV;
17290                           /* ADDS <Rdn>,#<imm8>  */
17291                           /* SUBS <Rdn>,#<imm8>  */
17292                           else if (rtx_equal_p (dst, op0)
17293                                    && CONST_INT_P (op1)
17294                                    && IN_RANGE (INTVAL (op1), -255, 255))
17295                             action = CONV;
17296                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17297                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17298                           else if (CONST_INT_P (op1)
17299                                    && IN_RANGE (INTVAL (op1), -7, 7))
17300                             action = CONV;
17301                         }
17302                       /* ADCS <Rd>, <Rn>  */
17303                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17304                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17305                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17306                                                        SImode)
17307                               && COMPARISON_P (op1)
17308                               && cc_register (XEXP (op1, 0), VOIDmode)
17309                               && maybe_get_arm_condition_code (op1) == ARM_CS
17310                               && XEXP (op1, 1) == const0_rtx)
17311                         action = CONV;
17312                       break;
17313
17314                     case MINUS:
17315                       /* RSBS <Rd>,<Rn>,#0
17316                          Not handled here: see NEG below.  */
17317                       /* SUBS <Rd>,<Rn>,#<imm3>
17318                          SUBS <Rdn>,#<imm8>
17319                          Not handled here: see PLUS above.  */
17320                       /* SUBS <Rd>,<Rn>,<Rm>  */
17321                       if (low_register_operand (op0, SImode)
17322                           && low_register_operand (op1, SImode))
17323                             action = CONV;
17324                       break;
17325
17326                     case MULT:
17327                       /* MULS <Rdm>,<Rn>,<Rdm>
17328                          As an exception to the rule, this is only used
17329                          when optimizing for size since MULS is slow on all
17330                          known implementations.  We do not even want to use
17331                          MULS in cold code, if optimizing for speed, so we
17332                          test the global flag here.  */
17333                       if (!optimize_size)
17334                         break;
17335                       /* Fall through.  */
17336                     case AND:
17337                     case IOR:
17338                     case XOR:
17339                       /* ANDS <Rdn>,<Rm>  */
17340                       if (rtx_equal_p (dst, op0)
17341                           && low_register_operand (op1, SImode))
17342                         action = action_for_partial_flag_setting;
17343                       else if (rtx_equal_p (dst, op1)
17344                                && low_register_operand (op0, SImode))
17345                         action = action_for_partial_flag_setting == SKIP
17346                                  ? SKIP : SWAP_CONV;
17347                       break;
17348
17349                     case ASHIFTRT:
17350                     case ASHIFT:
17351                     case LSHIFTRT:
17352                       /* ASRS <Rdn>,<Rm> */
17353                       /* LSRS <Rdn>,<Rm> */
17354                       /* LSLS <Rdn>,<Rm> */
17355                       if (rtx_equal_p (dst, op0)
17356                           && low_register_operand (op1, SImode))
17357                         action = action_for_partial_flag_setting;
17358                       /* ASRS <Rd>,<Rm>,#<imm5> */
17359                       /* LSRS <Rd>,<Rm>,#<imm5> */
17360                       /* LSLS <Rd>,<Rm>,#<imm5> */
17361                       else if (low_register_operand (op0, SImode)
17362                                && CONST_INT_P (op1)
17363                                && IN_RANGE (INTVAL (op1), 0, 31))
17364                         action = action_for_partial_flag_setting;
17365                       break;
17366
17367                     case ROTATERT:
17368                       /* RORS <Rdn>,<Rm>  */
17369                       if (rtx_equal_p (dst, op0)
17370                           && low_register_operand (op1, SImode))
17371                         action = action_for_partial_flag_setting;
17372                       break;
17373
17374                     case NOT:
17375                       /* MVNS <Rd>,<Rm>  */
17376                       if (low_register_operand (op0, SImode))
17377                         action = action_for_partial_flag_setting;
17378                       break;
17379
17380                     case NEG:
17381                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17382                       if (low_register_operand (op0, SImode))
17383                         action = CONV;
17384                       break;
17385
17386                     case CONST_INT:
17387                       /* MOVS <Rd>,#<imm8>  */
17388                       if (CONST_INT_P (src)
17389                           && IN_RANGE (INTVAL (src), 0, 255))
17390                         action = action_for_partial_flag_setting;
17391                       break;
17392
17393                     case REG:
17394                       /* MOVS and MOV<c> with registers have different
17395                          encodings, so are not relevant here.  */
17396                       break;
17397
17398                     default:
17399                       break;
17400                     }
17401                 }
17402
17403               if (action != SKIP)
17404                 {
17405                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17406                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17407                   rtvec vec;
17408
17409                   if (action == SWAP_CONV)
17410                     {
17411                       src = copy_rtx (src);
17412                       XEXP (src, 0) = op1;
17413                       XEXP (src, 1) = op0;
17414                       pat = gen_rtx_SET (dst, src);
17415                       vec = gen_rtvec (2, pat, clobber);
17416                     }
17417                   else /* action == CONV */
17418                     vec = gen_rtvec (2, pat, clobber);
17419
17420                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17421                   INSN_CODE (insn) = -1;
17422                 }
17423             }
17424
17425           if (NONDEBUG_INSN_P (insn))
17426             df_simulate_one_insn_backwards (bb, insn, &live);
17427         }
17428     }
17429
17430   CLEAR_REG_SET (&live);
17431 }
17432
17433 /* Gcc puts the pool in the wrong place for ARM, since we can only
17434    load addresses a limited distance around the pc.  We do some
17435    special munging to move the constant pool values to the correct
17436    point in the code.  */
17437 static void
17438 arm_reorg (void)
17439 {
17440   rtx_insn *insn;
17441   HOST_WIDE_INT address = 0;
17442   Mfix * fix;
17443
17444   if (use_cmse)
17445     cmse_nonsecure_call_clear_caller_saved ();
17446   if (TARGET_THUMB1)
17447     thumb1_reorg ();
17448   else if (TARGET_THUMB2)
17449     thumb2_reorg ();
17450
17451   /* Ensure all insns that must be split have been split at this point.
17452      Otherwise, the pool placement code below may compute incorrect
17453      insn lengths.  Note that when optimizing, all insns have already
17454      been split at this point.  */
17455   if (!optimize)
17456     split_all_insns_noflow ();
17457
17458   /* Make sure we do not attempt to create a literal pool even though it should
17459      no longer be necessary to create any.  */
17460   if (arm_disable_literal_pool)
17461     return ;
17462
17463   minipool_fix_head = minipool_fix_tail = NULL;
17464
17465   /* The first insn must always be a note, or the code below won't
17466      scan it properly.  */
17467   insn = get_insns ();
17468   gcc_assert (NOTE_P (insn));
17469   minipool_pad = 0;
17470
17471   /* Scan all the insns and record the operands that will need fixing.  */
17472   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17473     {
17474       if (BARRIER_P (insn))
17475         push_minipool_barrier (insn, address);
17476       else if (INSN_P (insn))
17477         {
17478           rtx_jump_table_data *table;
17479
17480           note_invalid_constants (insn, address, true);
17481           address += get_attr_length (insn);
17482
17483           /* If the insn is a vector jump, add the size of the table
17484              and skip the table.  */
17485           if (tablejump_p (insn, NULL, &table))
17486             {
17487               address += get_jump_table_size (table);
17488               insn = table;
17489             }
17490         }
17491       else if (LABEL_P (insn))
17492         /* Add the worst-case padding due to alignment.  We don't add
17493            the _current_ padding because the minipool insertions
17494            themselves might change it.  */
17495         address += get_label_padding (insn);
17496     }
17497
17498   fix = minipool_fix_head;
17499
17500   /* Now scan the fixups and perform the required changes.  */
17501   while (fix)
17502     {
17503       Mfix * ftmp;
17504       Mfix * fdel;
17505       Mfix *  last_added_fix;
17506       Mfix * last_barrier = NULL;
17507       Mfix * this_fix;
17508
17509       /* Skip any further barriers before the next fix.  */
17510       while (fix && BARRIER_P (fix->insn))
17511         fix = fix->next;
17512
17513       /* No more fixes.  */
17514       if (fix == NULL)
17515         break;
17516
17517       last_added_fix = NULL;
17518
17519       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17520         {
17521           if (BARRIER_P (ftmp->insn))
17522             {
17523               if (ftmp->address >= minipool_vector_head->max_address)
17524                 break;
17525
17526               last_barrier = ftmp;
17527             }
17528           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17529             break;
17530
17531           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17532         }
17533
17534       /* If we found a barrier, drop back to that; any fixes that we
17535          could have reached but come after the barrier will now go in
17536          the next mini-pool.  */
17537       if (last_barrier != NULL)
17538         {
17539           /* Reduce the refcount for those fixes that won't go into this
17540              pool after all.  */
17541           for (fdel = last_barrier->next;
17542                fdel && fdel != ftmp;
17543                fdel = fdel->next)
17544             {
17545               fdel->minipool->refcount--;
17546               fdel->minipool = NULL;
17547             }
17548
17549           ftmp = last_barrier;
17550         }
17551       else
17552         {
17553           /* ftmp is first fix that we can't fit into this pool and
17554              there no natural barriers that we could use.  Insert a
17555              new barrier in the code somewhere between the previous
17556              fix and this one, and arrange to jump around it.  */
17557           HOST_WIDE_INT max_address;
17558
17559           /* The last item on the list of fixes must be a barrier, so
17560              we can never run off the end of the list of fixes without
17561              last_barrier being set.  */
17562           gcc_assert (ftmp);
17563
17564           max_address = minipool_vector_head->max_address;
17565           /* Check that there isn't another fix that is in range that
17566              we couldn't fit into this pool because the pool was
17567              already too large: we need to put the pool before such an
17568              instruction.  The pool itself may come just after the
17569              fix because create_fix_barrier also allows space for a
17570              jump instruction.  */
17571           if (ftmp->address < max_address)
17572             max_address = ftmp->address + 1;
17573
17574           last_barrier = create_fix_barrier (last_added_fix, max_address);
17575         }
17576
17577       assign_minipool_offsets (last_barrier);
17578
17579       while (ftmp)
17580         {
17581           if (!BARRIER_P (ftmp->insn)
17582               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17583                   == NULL))
17584             break;
17585
17586           ftmp = ftmp->next;
17587         }
17588
17589       /* Scan over the fixes we have identified for this pool, fixing them
17590          up and adding the constants to the pool itself.  */
17591       for (this_fix = fix; this_fix && ftmp != this_fix;
17592            this_fix = this_fix->next)
17593         if (!BARRIER_P (this_fix->insn))
17594           {
17595             rtx addr
17596               = plus_constant (Pmode,
17597                                gen_rtx_LABEL_REF (VOIDmode,
17598                                                   minipool_vector_label),
17599                                this_fix->minipool->offset);
17600             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17601           }
17602
17603       dump_minipool (last_barrier->insn);
17604       fix = ftmp;
17605     }
17606
17607   /* From now on we must synthesize any constants that we can't handle
17608      directly.  This can happen if the RTL gets split during final
17609      instruction generation.  */
17610   cfun->machine->after_arm_reorg = 1;
17611
17612   /* Free the minipool memory.  */
17613   obstack_free (&minipool_obstack, minipool_startobj);
17614 }
17615 \f
17616 /* Routines to output assembly language.  */
17617
17618 /* Return string representation of passed in real value.  */
17619 static const char *
17620 fp_const_from_val (REAL_VALUE_TYPE *r)
17621 {
17622   if (!fp_consts_inited)
17623     init_fp_table ();
17624
17625   gcc_assert (real_equal (r, &value_fp0));
17626   return "0";
17627 }
17628
17629 /* OPERANDS[0] is the entire list of insns that constitute pop,
17630    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17631    is in the list, UPDATE is true iff the list contains explicit
17632    update of base register.  */
17633 void
17634 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17635                          bool update)
17636 {
17637   int i;
17638   char pattern[100];
17639   int offset;
17640   const char *conditional;
17641   int num_saves = XVECLEN (operands[0], 0);
17642   unsigned int regno;
17643   unsigned int regno_base = REGNO (operands[1]);
17644   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17645
17646   offset = 0;
17647   offset += update ? 1 : 0;
17648   offset += return_pc ? 1 : 0;
17649
17650   /* Is the base register in the list?  */
17651   for (i = offset; i < num_saves; i++)
17652     {
17653       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17654       /* If SP is in the list, then the base register must be SP.  */
17655       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17656       /* If base register is in the list, there must be no explicit update.  */
17657       if (regno == regno_base)
17658         gcc_assert (!update);
17659     }
17660
17661   conditional = reverse ? "%?%D0" : "%?%d0";
17662   /* Can't use POP if returning from an interrupt.  */
17663   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17664     sprintf (pattern, "pop%s\t{", conditional);
17665   else
17666     {
17667       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17668          It's just a convention, their semantics are identical.  */
17669       if (regno_base == SP_REGNUM)
17670         sprintf (pattern, "ldmfd%s\t", conditional);
17671       else if (update)
17672         sprintf (pattern, "ldmia%s\t", conditional);
17673       else
17674         sprintf (pattern, "ldm%s\t", conditional);
17675
17676       strcat (pattern, reg_names[regno_base]);
17677       if (update)
17678         strcat (pattern, "!, {");
17679       else
17680         strcat (pattern, ", {");
17681     }
17682
17683   /* Output the first destination register.  */
17684   strcat (pattern,
17685           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17686
17687   /* Output the rest of the destination registers.  */
17688   for (i = offset + 1; i < num_saves; i++)
17689     {
17690       strcat (pattern, ", ");
17691       strcat (pattern,
17692               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17693     }
17694
17695   strcat (pattern, "}");
17696
17697   if (interrupt_p && return_pc)
17698     strcat (pattern, "^");
17699
17700   output_asm_insn (pattern, &cond);
17701 }
17702
17703
17704 /* Output the assembly for a store multiple.  */
17705
17706 const char *
17707 vfp_output_vstmd (rtx * operands)
17708 {
17709   char pattern[100];
17710   int p;
17711   int base;
17712   int i;
17713   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17714                    ? XEXP (operands[0], 0)
17715                    : XEXP (XEXP (operands[0], 0), 0);
17716   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17717
17718   if (push_p)
17719     strcpy (pattern, "vpush%?.64\t{%P1");
17720   else
17721     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17722
17723   p = strlen (pattern);
17724
17725   gcc_assert (REG_P (operands[1]));
17726
17727   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17728   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17729     {
17730       p += sprintf (&pattern[p], ", d%d", base + i);
17731     }
17732   strcpy (&pattern[p], "}");
17733
17734   output_asm_insn (pattern, operands);
17735   return "";
17736 }
17737
17738
17739 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17740    number of bytes pushed.  */
17741
17742 static int
17743 vfp_emit_fstmd (int base_reg, int count)
17744 {
17745   rtx par;
17746   rtx dwarf;
17747   rtx tmp, reg;
17748   int i;
17749
17750   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17751      register pairs are stored by a store multiple insn.  We avoid this
17752      by pushing an extra pair.  */
17753   if (count == 2 && !arm_arch6)
17754     {
17755       if (base_reg == LAST_VFP_REGNUM - 3)
17756         base_reg -= 2;
17757       count++;
17758     }
17759
17760   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17761      larger stores into multiple parts (up to a maximum of two, in
17762      practice).  */
17763   if (count > 16)
17764     {
17765       int saved;
17766       /* NOTE: base_reg is an internal register number, so each D register
17767          counts as 2.  */
17768       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17769       saved += vfp_emit_fstmd (base_reg, 16);
17770       return saved;
17771     }
17772
17773   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17774   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17775
17776   reg = gen_rtx_REG (DFmode, base_reg);
17777   base_reg += 2;
17778
17779   XVECEXP (par, 0, 0)
17780     = gen_rtx_SET (gen_frame_mem
17781                    (BLKmode,
17782                     gen_rtx_PRE_MODIFY (Pmode,
17783                                         stack_pointer_rtx,
17784                                         plus_constant
17785                                         (Pmode, stack_pointer_rtx,
17786                                          - (count * 8)))
17787                     ),
17788                    gen_rtx_UNSPEC (BLKmode,
17789                                    gen_rtvec (1, reg),
17790                                    UNSPEC_PUSH_MULT));
17791
17792   tmp = gen_rtx_SET (stack_pointer_rtx,
17793                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17794   RTX_FRAME_RELATED_P (tmp) = 1;
17795   XVECEXP (dwarf, 0, 0) = tmp;
17796
17797   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17798   RTX_FRAME_RELATED_P (tmp) = 1;
17799   XVECEXP (dwarf, 0, 1) = tmp;
17800
17801   for (i = 1; i < count; i++)
17802     {
17803       reg = gen_rtx_REG (DFmode, base_reg);
17804       base_reg += 2;
17805       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17806
17807       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17808                                         plus_constant (Pmode,
17809                                                        stack_pointer_rtx,
17810                                                        i * 8)),
17811                          reg);
17812       RTX_FRAME_RELATED_P (tmp) = 1;
17813       XVECEXP (dwarf, 0, i + 1) = tmp;
17814     }
17815
17816   par = emit_insn (par);
17817   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17818   RTX_FRAME_RELATED_P (par) = 1;
17819
17820   return count * 8;
17821 }
17822
17823 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17824    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17825
17826 bool
17827 detect_cmse_nonsecure_call (tree addr)
17828 {
17829   if (!addr)
17830     return FALSE;
17831
17832   tree fntype = TREE_TYPE (addr);
17833   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17834                                     TYPE_ATTRIBUTES (fntype)))
17835     return TRUE;
17836   return FALSE;
17837 }
17838
17839
17840 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17841    the call target.  */
17842
17843 void
17844 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17845 {
17846   rtx insn;
17847
17848   insn = emit_call_insn (pat);
17849
17850   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17851      If the call might use such an entry, add a use of the PIC register
17852      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17853   if (TARGET_VXWORKS_RTP
17854       && flag_pic
17855       && !sibcall
17856       && GET_CODE (addr) == SYMBOL_REF
17857       && (SYMBOL_REF_DECL (addr)
17858           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17859           : !SYMBOL_REF_LOCAL_P (addr)))
17860     {
17861       require_pic_register ();
17862       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17863     }
17864
17865   if (TARGET_AAPCS_BASED)
17866     {
17867       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17868          linker.  We need to add an IP clobber to allow setting
17869          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17870          is not needed since it's a fixed register.  */
17871       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17872       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17873     }
17874 }
17875
17876 /* Output a 'call' insn.  */
17877 const char *
17878 output_call (rtx *operands)
17879 {
17880   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17881
17882   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17883   if (REGNO (operands[0]) == LR_REGNUM)
17884     {
17885       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17886       output_asm_insn ("mov%?\t%0, %|lr", operands);
17887     }
17888
17889   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17890
17891   if (TARGET_INTERWORK || arm_arch4t)
17892     output_asm_insn ("bx%?\t%0", operands);
17893   else
17894     output_asm_insn ("mov%?\t%|pc, %0", operands);
17895
17896   return "";
17897 }
17898
17899 /* Output a move from arm registers to arm registers of a long double
17900    OPERANDS[0] is the destination.
17901    OPERANDS[1] is the source.  */
17902 const char *
17903 output_mov_long_double_arm_from_arm (rtx *operands)
17904 {
17905   /* We have to be careful here because the two might overlap.  */
17906   int dest_start = REGNO (operands[0]);
17907   int src_start = REGNO (operands[1]);
17908   rtx ops[2];
17909   int i;
17910
17911   if (dest_start < src_start)
17912     {
17913       for (i = 0; i < 3; i++)
17914         {
17915           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17916           ops[1] = gen_rtx_REG (SImode, src_start + i);
17917           output_asm_insn ("mov%?\t%0, %1", ops);
17918         }
17919     }
17920   else
17921     {
17922       for (i = 2; i >= 0; i--)
17923         {
17924           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17925           ops[1] = gen_rtx_REG (SImode, src_start + i);
17926           output_asm_insn ("mov%?\t%0, %1", ops);
17927         }
17928     }
17929
17930   return "";
17931 }
17932
17933 void
17934 arm_emit_movpair (rtx dest, rtx src)
17935  {
17936   /* If the src is an immediate, simplify it.  */
17937   if (CONST_INT_P (src))
17938     {
17939       HOST_WIDE_INT val = INTVAL (src);
17940       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17941       if ((val >> 16) & 0x0000ffff)
17942         {
17943           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17944                                                GEN_INT (16)),
17945                          GEN_INT ((val >> 16) & 0x0000ffff));
17946           rtx_insn *insn = get_last_insn ();
17947           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17948         }
17949       return;
17950     }
17951    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17952    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17953    rtx_insn *insn = get_last_insn ();
17954    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17955  }
17956
17957 /* Output a move between double words.  It must be REG<-MEM
17958    or MEM<-REG.  */
17959 const char *
17960 output_move_double (rtx *operands, bool emit, int *count)
17961 {
17962   enum rtx_code code0 = GET_CODE (operands[0]);
17963   enum rtx_code code1 = GET_CODE (operands[1]);
17964   rtx otherops[3];
17965   if (count)
17966     *count = 1;
17967
17968   /* The only case when this might happen is when
17969      you are looking at the length of a DImode instruction
17970      that has an invalid constant in it.  */
17971   if (code0 == REG && code1 != MEM)
17972     {
17973       gcc_assert (!emit);
17974       *count = 2;
17975       return "";
17976     }
17977
17978   if (code0 == REG)
17979     {
17980       unsigned int reg0 = REGNO (operands[0]);
17981
17982       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17983
17984       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17985
17986       switch (GET_CODE (XEXP (operands[1], 0)))
17987         {
17988         case REG:
17989
17990           if (emit)
17991             {
17992               if (TARGET_LDRD
17993                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17994                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17995               else
17996                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17997             }
17998           break;
17999
18000         case PRE_INC:
18001           gcc_assert (TARGET_LDRD);
18002           if (emit)
18003             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18004           break;
18005
18006         case PRE_DEC:
18007           if (emit)
18008             {
18009               if (TARGET_LDRD)
18010                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18011               else
18012                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18013             }
18014           break;
18015
18016         case POST_INC:
18017           if (emit)
18018             {
18019               if (TARGET_LDRD)
18020                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18021               else
18022                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18023             }
18024           break;
18025
18026         case POST_DEC:
18027           gcc_assert (TARGET_LDRD);
18028           if (emit)
18029             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18030           break;
18031
18032         case PRE_MODIFY:
18033         case POST_MODIFY:
18034           /* Autoicrement addressing modes should never have overlapping
18035              base and destination registers, and overlapping index registers
18036              are already prohibited, so this doesn't need to worry about
18037              fix_cm3_ldrd.  */
18038           otherops[0] = operands[0];
18039           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18040           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18041
18042           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18043             {
18044               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18045                 {
18046                   /* Registers overlap so split out the increment.  */
18047                   if (emit)
18048                     {
18049                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18050                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18051                     }
18052                   if (count)
18053                     *count = 2;
18054                 }
18055               else
18056                 {
18057                   /* Use a single insn if we can.
18058                      FIXME: IWMMXT allows offsets larger than ldrd can
18059                      handle, fix these up with a pair of ldr.  */
18060                   if (TARGET_THUMB2
18061                       || !CONST_INT_P (otherops[2])
18062                       || (INTVAL (otherops[2]) > -256
18063                           && INTVAL (otherops[2]) < 256))
18064                     {
18065                       if (emit)
18066                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18067                     }
18068                   else
18069                     {
18070                       if (emit)
18071                         {
18072                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18073                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18074                         }
18075                       if (count)
18076                         *count = 2;
18077
18078                     }
18079                 }
18080             }
18081           else
18082             {
18083               /* Use a single insn if we can.
18084                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18085                  fix these up with a pair of ldr.  */
18086               if (TARGET_THUMB2
18087                   || !CONST_INT_P (otherops[2])
18088                   || (INTVAL (otherops[2]) > -256
18089                       && INTVAL (otherops[2]) < 256))
18090                 {
18091                   if (emit)
18092                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18093                 }
18094               else
18095                 {
18096                   if (emit)
18097                     {
18098                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18099                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18100                     }
18101                   if (count)
18102                     *count = 2;
18103                 }
18104             }
18105           break;
18106
18107         case LABEL_REF:
18108         case CONST:
18109           /* We might be able to use ldrd %0, %1 here.  However the range is
18110              different to ldr/adr, and it is broken on some ARMv7-M
18111              implementations.  */
18112           /* Use the second register of the pair to avoid problematic
18113              overlap.  */
18114           otherops[1] = operands[1];
18115           if (emit)
18116             output_asm_insn ("adr%?\t%0, %1", otherops);
18117           operands[1] = otherops[0];
18118           if (emit)
18119             {
18120               if (TARGET_LDRD)
18121                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18122               else
18123                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18124             }
18125
18126           if (count)
18127             *count = 2;
18128           break;
18129
18130           /* ??? This needs checking for thumb2.  */
18131         default:
18132           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18133                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18134             {
18135               otherops[0] = operands[0];
18136               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18137               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18138
18139               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18140                 {
18141                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18142                     {
18143                       switch ((int) INTVAL (otherops[2]))
18144                         {
18145                         case -8:
18146                           if (emit)
18147                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18148                           return "";
18149                         case -4:
18150                           if (TARGET_THUMB2)
18151                             break;
18152                           if (emit)
18153                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18154                           return "";
18155                         case 4:
18156                           if (TARGET_THUMB2)
18157                             break;
18158                           if (emit)
18159                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18160                           return "";
18161                         }
18162                     }
18163                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18164                   operands[1] = otherops[0];
18165                   if (TARGET_LDRD
18166                       && (REG_P (otherops[2])
18167                           || TARGET_THUMB2
18168                           || (CONST_INT_P (otherops[2])
18169                               && INTVAL (otherops[2]) > -256
18170                               && INTVAL (otherops[2]) < 256)))
18171                     {
18172                       if (reg_overlap_mentioned_p (operands[0],
18173                                                    otherops[2]))
18174                         {
18175                           /* Swap base and index registers over to
18176                              avoid a conflict.  */
18177                           std::swap (otherops[1], otherops[2]);
18178                         }
18179                       /* If both registers conflict, it will usually
18180                          have been fixed by a splitter.  */
18181                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18182                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18183                         {
18184                           if (emit)
18185                             {
18186                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18187                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18188                             }
18189                           if (count)
18190                             *count = 2;
18191                         }
18192                       else
18193                         {
18194                           otherops[0] = operands[0];
18195                           if (emit)
18196                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18197                         }
18198                       return "";
18199                     }
18200
18201                   if (CONST_INT_P (otherops[2]))
18202                     {
18203                       if (emit)
18204                         {
18205                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18206                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18207                           else
18208                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18209                         }
18210                     }
18211                   else
18212                     {
18213                       if (emit)
18214                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18215                     }
18216                 }
18217               else
18218                 {
18219                   if (emit)
18220                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18221                 }
18222
18223               if (count)
18224                 *count = 2;
18225
18226               if (TARGET_LDRD)
18227                 return "ldrd%?\t%0, [%1]";
18228
18229               return "ldmia%?\t%1, %M0";
18230             }
18231           else
18232             {
18233               otherops[1] = adjust_address (operands[1], SImode, 4);
18234               /* Take care of overlapping base/data reg.  */
18235               if (reg_mentioned_p (operands[0], operands[1]))
18236                 {
18237                   if (emit)
18238                     {
18239                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18240                       output_asm_insn ("ldr%?\t%0, %1", operands);
18241                     }
18242                   if (count)
18243                     *count = 2;
18244
18245                 }
18246               else
18247                 {
18248                   if (emit)
18249                     {
18250                       output_asm_insn ("ldr%?\t%0, %1", operands);
18251                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18252                     }
18253                   if (count)
18254                     *count = 2;
18255                 }
18256             }
18257         }
18258     }
18259   else
18260     {
18261       /* Constraints should ensure this.  */
18262       gcc_assert (code0 == MEM && code1 == REG);
18263       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18264                   || (TARGET_ARM && TARGET_LDRD));
18265
18266       switch (GET_CODE (XEXP (operands[0], 0)))
18267         {
18268         case REG:
18269           if (emit)
18270             {
18271               if (TARGET_LDRD)
18272                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18273               else
18274                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18275             }
18276           break;
18277
18278         case PRE_INC:
18279           gcc_assert (TARGET_LDRD);
18280           if (emit)
18281             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18282           break;
18283
18284         case PRE_DEC:
18285           if (emit)
18286             {
18287               if (TARGET_LDRD)
18288                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18289               else
18290                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18291             }
18292           break;
18293
18294         case POST_INC:
18295           if (emit)
18296             {
18297               if (TARGET_LDRD)
18298                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18299               else
18300                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18301             }
18302           break;
18303
18304         case POST_DEC:
18305           gcc_assert (TARGET_LDRD);
18306           if (emit)
18307             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18308           break;
18309
18310         case PRE_MODIFY:
18311         case POST_MODIFY:
18312           otherops[0] = operands[1];
18313           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18314           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18315
18316           /* IWMMXT allows offsets larger than ldrd can handle,
18317              fix these up with a pair of ldr.  */
18318           if (!TARGET_THUMB2
18319               && CONST_INT_P (otherops[2])
18320               && (INTVAL(otherops[2]) <= -256
18321                   || INTVAL(otherops[2]) >= 256))
18322             {
18323               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18324                 {
18325                   if (emit)
18326                     {
18327                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18328                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18329                     }
18330                   if (count)
18331                     *count = 2;
18332                 }
18333               else
18334                 {
18335                   if (emit)
18336                     {
18337                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18338                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18339                     }
18340                   if (count)
18341                     *count = 2;
18342                 }
18343             }
18344           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18345             {
18346               if (emit)
18347                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18348             }
18349           else
18350             {
18351               if (emit)
18352                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18353             }
18354           break;
18355
18356         case PLUS:
18357           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18358           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18359             {
18360               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18361                 {
18362                 case -8:
18363                   if (emit)
18364                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18365                   return "";
18366
18367                 case -4:
18368                   if (TARGET_THUMB2)
18369                     break;
18370                   if (emit)
18371                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18372                   return "";
18373
18374                 case 4:
18375                   if (TARGET_THUMB2)
18376                     break;
18377                   if (emit)
18378                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18379                   return "";
18380                 }
18381             }
18382           if (TARGET_LDRD
18383               && (REG_P (otherops[2])
18384                   || TARGET_THUMB2
18385                   || (CONST_INT_P (otherops[2])
18386                       && INTVAL (otherops[2]) > -256
18387                       && INTVAL (otherops[2]) < 256)))
18388             {
18389               otherops[0] = operands[1];
18390               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18391               if (emit)
18392                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18393               return "";
18394             }
18395           /* Fall through */
18396
18397         default:
18398           otherops[0] = adjust_address (operands[0], SImode, 4);
18399           otherops[1] = operands[1];
18400           if (emit)
18401             {
18402               output_asm_insn ("str%?\t%1, %0", operands);
18403               output_asm_insn ("str%?\t%H1, %0", otherops);
18404             }
18405           if (count)
18406             *count = 2;
18407         }
18408     }
18409
18410   return "";
18411 }
18412
18413 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18414    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18415
18416 const char *
18417 output_move_quad (rtx *operands)
18418 {
18419   if (REG_P (operands[0]))
18420     {
18421       /* Load, or reg->reg move.  */
18422
18423       if (MEM_P (operands[1]))
18424         {
18425           switch (GET_CODE (XEXP (operands[1], 0)))
18426             {
18427             case REG:
18428               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18429               break;
18430
18431             case LABEL_REF:
18432             case CONST:
18433               output_asm_insn ("adr%?\t%0, %1", operands);
18434               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18435               break;
18436
18437             default:
18438               gcc_unreachable ();
18439             }
18440         }
18441       else
18442         {
18443           rtx ops[2];
18444           int dest, src, i;
18445
18446           gcc_assert (REG_P (operands[1]));
18447
18448           dest = REGNO (operands[0]);
18449           src = REGNO (operands[1]);
18450
18451           /* This seems pretty dumb, but hopefully GCC won't try to do it
18452              very often.  */
18453           if (dest < src)
18454             for (i = 0; i < 4; i++)
18455               {
18456                 ops[0] = gen_rtx_REG (SImode, dest + i);
18457                 ops[1] = gen_rtx_REG (SImode, src + i);
18458                 output_asm_insn ("mov%?\t%0, %1", ops);
18459               }
18460           else
18461             for (i = 3; i >= 0; i--)
18462               {
18463                 ops[0] = gen_rtx_REG (SImode, dest + i);
18464                 ops[1] = gen_rtx_REG (SImode, src + i);
18465                 output_asm_insn ("mov%?\t%0, %1", ops);
18466               }
18467         }
18468     }
18469   else
18470     {
18471       gcc_assert (MEM_P (operands[0]));
18472       gcc_assert (REG_P (operands[1]));
18473       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18474
18475       switch (GET_CODE (XEXP (operands[0], 0)))
18476         {
18477         case REG:
18478           output_asm_insn ("stm%?\t%m0, %M1", operands);
18479           break;
18480
18481         default:
18482           gcc_unreachable ();
18483         }
18484     }
18485
18486   return "";
18487 }
18488
18489 /* Output a VFP load or store instruction.  */
18490
18491 const char *
18492 output_move_vfp (rtx *operands)
18493 {
18494   rtx reg, mem, addr, ops[2];
18495   int load = REG_P (operands[0]);
18496   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18497   int sp = (!TARGET_VFP_FP16INST
18498             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18499   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18500   const char *templ;
18501   char buff[50];
18502   machine_mode mode;
18503
18504   reg = operands[!load];
18505   mem = operands[load];
18506
18507   mode = GET_MODE (reg);
18508
18509   gcc_assert (REG_P (reg));
18510   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18511   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18512               || mode == SFmode
18513               || mode == DFmode
18514               || mode == HImode
18515               || mode == SImode
18516               || mode == DImode
18517               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18518   gcc_assert (MEM_P (mem));
18519
18520   addr = XEXP (mem, 0);
18521
18522   switch (GET_CODE (addr))
18523     {
18524     case PRE_DEC:
18525       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18526       ops[0] = XEXP (addr, 0);
18527       ops[1] = reg;
18528       break;
18529
18530     case POST_INC:
18531       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18532       ops[0] = XEXP (addr, 0);
18533       ops[1] = reg;
18534       break;
18535
18536     default:
18537       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18538       ops[0] = reg;
18539       ops[1] = mem;
18540       break;
18541     }
18542
18543   sprintf (buff, templ,
18544            load ? "ld" : "st",
18545            dp ? "64" : sp ? "32" : "16",
18546            dp ? "P" : "",
18547            integer_p ? "\t%@ int" : "");
18548   output_asm_insn (buff, ops);
18549
18550   return "";
18551 }
18552
18553 /* Output a Neon double-word or quad-word load or store, or a load
18554    or store for larger structure modes.
18555
18556    WARNING: The ordering of elements is weird in big-endian mode,
18557    because the EABI requires that vectors stored in memory appear
18558    as though they were stored by a VSTM, as required by the EABI.
18559    GCC RTL defines element ordering based on in-memory order.
18560    This can be different from the architectural ordering of elements
18561    within a NEON register. The intrinsics defined in arm_neon.h use the
18562    NEON register element ordering, not the GCC RTL element ordering.
18563
18564    For example, the in-memory ordering of a big-endian a quadword
18565    vector with 16-bit elements when stored from register pair {d0,d1}
18566    will be (lowest address first, d0[N] is NEON register element N):
18567
18568      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18569
18570    When necessary, quadword registers (dN, dN+1) are moved to ARM
18571    registers from rN in the order:
18572
18573      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18574
18575    So that STM/LDM can be used on vectors in ARM registers, and the
18576    same memory layout will result as if VSTM/VLDM were used.
18577
18578    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18579    possible, which allows use of appropriate alignment tags.
18580    Note that the choice of "64" is independent of the actual vector
18581    element size; this size simply ensures that the behavior is
18582    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18583
18584    Due to limitations of those instructions, use of VST1.64/VLD1.64
18585    is not possible if:
18586     - the address contains PRE_DEC, or
18587     - the mode refers to more than 4 double-word registers
18588
18589    In those cases, it would be possible to replace VSTM/VLDM by a
18590    sequence of instructions; this is not currently implemented since
18591    this is not certain to actually improve performance.  */
18592
18593 const char *
18594 output_move_neon (rtx *operands)
18595 {
18596   rtx reg, mem, addr, ops[2];
18597   int regno, nregs, load = REG_P (operands[0]);
18598   const char *templ;
18599   char buff[50];
18600   machine_mode mode;
18601
18602   reg = operands[!load];
18603   mem = operands[load];
18604
18605   mode = GET_MODE (reg);
18606
18607   gcc_assert (REG_P (reg));
18608   regno = REGNO (reg);
18609   nregs = REG_NREGS (reg) / 2;
18610   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18611               || NEON_REGNO_OK_FOR_QUAD (regno));
18612   gcc_assert (VALID_NEON_DREG_MODE (mode)
18613               || VALID_NEON_QREG_MODE (mode)
18614               || VALID_NEON_STRUCT_MODE (mode));
18615   gcc_assert (MEM_P (mem));
18616
18617   addr = XEXP (mem, 0);
18618
18619   /* Strip off const from addresses like (const (plus (...))).  */
18620   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18621     addr = XEXP (addr, 0);
18622
18623   switch (GET_CODE (addr))
18624     {
18625     case POST_INC:
18626       /* We have to use vldm / vstm for too-large modes.  */
18627       if (nregs > 4)
18628         {
18629           templ = "v%smia%%?\t%%0!, %%h1";
18630           ops[0] = XEXP (addr, 0);
18631         }
18632       else
18633         {
18634           templ = "v%s1.64\t%%h1, %%A0";
18635           ops[0] = mem;
18636         }
18637       ops[1] = reg;
18638       break;
18639
18640     case PRE_DEC:
18641       /* We have to use vldm / vstm in this case, since there is no
18642          pre-decrement form of the vld1 / vst1 instructions.  */
18643       templ = "v%smdb%%?\t%%0!, %%h1";
18644       ops[0] = XEXP (addr, 0);
18645       ops[1] = reg;
18646       break;
18647
18648     case POST_MODIFY:
18649       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18650       gcc_unreachable ();
18651
18652     case REG:
18653       /* We have to use vldm / vstm for too-large modes.  */
18654       if (nregs > 1)
18655         {
18656           if (nregs > 4)
18657             templ = "v%smia%%?\t%%m0, %%h1";
18658           else
18659             templ = "v%s1.64\t%%h1, %%A0";
18660
18661           ops[0] = mem;
18662           ops[1] = reg;
18663           break;
18664         }
18665       /* Fall through.  */
18666     case LABEL_REF:
18667     case PLUS:
18668       {
18669         int i;
18670         int overlap = -1;
18671         for (i = 0; i < nregs; i++)
18672           {
18673             /* We're only using DImode here because it's a convenient size.  */
18674             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18675             ops[1] = adjust_address (mem, DImode, 8 * i);
18676             if (reg_overlap_mentioned_p (ops[0], mem))
18677               {
18678                 gcc_assert (overlap == -1);
18679                 overlap = i;
18680               }
18681             else
18682               {
18683                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18684                 output_asm_insn (buff, ops);
18685               }
18686           }
18687         if (overlap != -1)
18688           {
18689             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18690             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18691             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18692             output_asm_insn (buff, ops);
18693           }
18694
18695         return "";
18696       }
18697
18698     default:
18699       gcc_unreachable ();
18700     }
18701
18702   sprintf (buff, templ, load ? "ld" : "st");
18703   output_asm_insn (buff, ops);
18704
18705   return "";
18706 }
18707
18708 /* Compute and return the length of neon_mov<mode>, where <mode> is
18709    one of VSTRUCT modes: EI, OI, CI or XI.  */
18710 int
18711 arm_attr_length_move_neon (rtx_insn *insn)
18712 {
18713   rtx reg, mem, addr;
18714   int load;
18715   machine_mode mode;
18716
18717   extract_insn_cached (insn);
18718
18719   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18720     {
18721       mode = GET_MODE (recog_data.operand[0]);
18722       switch (mode)
18723         {
18724         case E_EImode:
18725         case E_OImode:
18726           return 8;
18727         case E_CImode:
18728           return 12;
18729         case E_XImode:
18730           return 16;
18731         default:
18732           gcc_unreachable ();
18733         }
18734     }
18735
18736   load = REG_P (recog_data.operand[0]);
18737   reg = recog_data.operand[!load];
18738   mem = recog_data.operand[load];
18739
18740   gcc_assert (MEM_P (mem));
18741
18742   addr = XEXP (mem, 0);
18743
18744   /* Strip off const from addresses like (const (plus (...))).  */
18745   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18746     addr = XEXP (addr, 0);
18747
18748   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18749     {
18750       int insns = REG_NREGS (reg) / 2;
18751       return insns * 4;
18752     }
18753   else
18754     return 4;
18755 }
18756
18757 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18758    return zero.  */
18759
18760 int
18761 arm_address_offset_is_imm (rtx_insn *insn)
18762 {
18763   rtx mem, addr;
18764
18765   extract_insn_cached (insn);
18766
18767   if (REG_P (recog_data.operand[0]))
18768     return 0;
18769
18770   mem = recog_data.operand[0];
18771
18772   gcc_assert (MEM_P (mem));
18773
18774   addr = XEXP (mem, 0);
18775
18776   if (REG_P (addr)
18777       || (GET_CODE (addr) == PLUS
18778           && REG_P (XEXP (addr, 0))
18779           && CONST_INT_P (XEXP (addr, 1))))
18780     return 1;
18781   else
18782     return 0;
18783 }
18784
18785 /* Output an ADD r, s, #n where n may be too big for one instruction.
18786    If adding zero to one register, output nothing.  */
18787 const char *
18788 output_add_immediate (rtx *operands)
18789 {
18790   HOST_WIDE_INT n = INTVAL (operands[2]);
18791
18792   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18793     {
18794       if (n < 0)
18795         output_multi_immediate (operands,
18796                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18797                                 -n);
18798       else
18799         output_multi_immediate (operands,
18800                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18801                                 n);
18802     }
18803
18804   return "";
18805 }
18806
18807 /* Output a multiple immediate operation.
18808    OPERANDS is the vector of operands referred to in the output patterns.
18809    INSTR1 is the output pattern to use for the first constant.
18810    INSTR2 is the output pattern to use for subsequent constants.
18811    IMMED_OP is the index of the constant slot in OPERANDS.
18812    N is the constant value.  */
18813 static const char *
18814 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18815                         int immed_op, HOST_WIDE_INT n)
18816 {
18817 #if HOST_BITS_PER_WIDE_INT > 32
18818   n &= 0xffffffff;
18819 #endif
18820
18821   if (n == 0)
18822     {
18823       /* Quick and easy output.  */
18824       operands[immed_op] = const0_rtx;
18825       output_asm_insn (instr1, operands);
18826     }
18827   else
18828     {
18829       int i;
18830       const char * instr = instr1;
18831
18832       /* Note that n is never zero here (which would give no output).  */
18833       for (i = 0; i < 32; i += 2)
18834         {
18835           if (n & (3 << i))
18836             {
18837               operands[immed_op] = GEN_INT (n & (255 << i));
18838               output_asm_insn (instr, operands);
18839               instr = instr2;
18840               i += 6;
18841             }
18842         }
18843     }
18844
18845   return "";
18846 }
18847
18848 /* Return the name of a shifter operation.  */
18849 static const char *
18850 arm_shift_nmem(enum rtx_code code)
18851 {
18852   switch (code)
18853     {
18854     case ASHIFT:
18855       return ARM_LSL_NAME;
18856
18857     case ASHIFTRT:
18858       return "asr";
18859
18860     case LSHIFTRT:
18861       return "lsr";
18862
18863     case ROTATERT:
18864       return "ror";
18865
18866     default:
18867       abort();
18868     }
18869 }
18870
18871 /* Return the appropriate ARM instruction for the operation code.
18872    The returned result should not be overwritten.  OP is the rtx of the
18873    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18874    was shifted.  */
18875 const char *
18876 arithmetic_instr (rtx op, int shift_first_arg)
18877 {
18878   switch (GET_CODE (op))
18879     {
18880     case PLUS:
18881       return "add";
18882
18883     case MINUS:
18884       return shift_first_arg ? "rsb" : "sub";
18885
18886     case IOR:
18887       return "orr";
18888
18889     case XOR:
18890       return "eor";
18891
18892     case AND:
18893       return "and";
18894
18895     case ASHIFT:
18896     case ASHIFTRT:
18897     case LSHIFTRT:
18898     case ROTATERT:
18899       return arm_shift_nmem(GET_CODE(op));
18900
18901     default:
18902       gcc_unreachable ();
18903     }
18904 }
18905
18906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18907    for the operation code.  The returned result should not be overwritten.
18908    OP is the rtx code of the shift.
18909    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18910    shift.  */
18911 static const char *
18912 shift_op (rtx op, HOST_WIDE_INT *amountp)
18913 {
18914   const char * mnem;
18915   enum rtx_code code = GET_CODE (op);
18916
18917   switch (code)
18918     {
18919     case ROTATE:
18920       if (!CONST_INT_P (XEXP (op, 1)))
18921         {
18922           output_operand_lossage ("invalid shift operand");
18923           return NULL;
18924         }
18925
18926       code = ROTATERT;
18927       *amountp = 32 - INTVAL (XEXP (op, 1));
18928       mnem = "ror";
18929       break;
18930
18931     case ASHIFT:
18932     case ASHIFTRT:
18933     case LSHIFTRT:
18934     case ROTATERT:
18935       mnem = arm_shift_nmem(code);
18936       if (CONST_INT_P (XEXP (op, 1)))
18937         {
18938           *amountp = INTVAL (XEXP (op, 1));
18939         }
18940       else if (REG_P (XEXP (op, 1)))
18941         {
18942           *amountp = -1;
18943           return mnem;
18944         }
18945       else
18946         {
18947           output_operand_lossage ("invalid shift operand");
18948           return NULL;
18949         }
18950       break;
18951
18952     case MULT:
18953       /* We never have to worry about the amount being other than a
18954          power of 2, since this case can never be reloaded from a reg.  */
18955       if (!CONST_INT_P (XEXP (op, 1)))
18956         {
18957           output_operand_lossage ("invalid shift operand");
18958           return NULL;
18959         }
18960
18961       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18962
18963       /* Amount must be a power of two.  */
18964       if (*amountp & (*amountp - 1))
18965         {
18966           output_operand_lossage ("invalid shift operand");
18967           return NULL;
18968         }
18969
18970       *amountp = exact_log2 (*amountp);
18971       gcc_assert (IN_RANGE (*amountp, 0, 31));
18972       return ARM_LSL_NAME;
18973
18974     default:
18975       output_operand_lossage ("invalid shift operand");
18976       return NULL;
18977     }
18978
18979   /* This is not 100% correct, but follows from the desire to merge
18980      multiplication by a power of 2 with the recognizer for a
18981      shift.  >=32 is not a valid shift for "lsl", so we must try and
18982      output a shift that produces the correct arithmetical result.
18983      Using lsr #32 is identical except for the fact that the carry bit
18984      is not set correctly if we set the flags; but we never use the
18985      carry bit from such an operation, so we can ignore that.  */
18986   if (code == ROTATERT)
18987     /* Rotate is just modulo 32.  */
18988     *amountp &= 31;
18989   else if (*amountp != (*amountp & 31))
18990     {
18991       if (code == ASHIFT)
18992         mnem = "lsr";
18993       *amountp = 32;
18994     }
18995
18996   /* Shifts of 0 are no-ops.  */
18997   if (*amountp == 0)
18998     return NULL;
18999
19000   return mnem;
19001 }
19002
19003 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19004    because /bin/as is horribly restrictive.  The judgement about
19005    whether or not each character is 'printable' (and can be output as
19006    is) or not (and must be printed with an octal escape) must be made
19007    with reference to the *host* character set -- the situation is
19008    similar to that discussed in the comments above pp_c_char in
19009    c-pretty-print.c.  */
19010
19011 #define MAX_ASCII_LEN 51
19012
19013 void
19014 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19015 {
19016   int i;
19017   int len_so_far = 0;
19018
19019   fputs ("\t.ascii\t\"", stream);
19020
19021   for (i = 0; i < len; i++)
19022     {
19023       int c = p[i];
19024
19025       if (len_so_far >= MAX_ASCII_LEN)
19026         {
19027           fputs ("\"\n\t.ascii\t\"", stream);
19028           len_so_far = 0;
19029         }
19030
19031       if (ISPRINT (c))
19032         {
19033           if (c == '\\' || c == '\"')
19034             {
19035               putc ('\\', stream);
19036               len_so_far++;
19037             }
19038           putc (c, stream);
19039           len_so_far++;
19040         }
19041       else
19042         {
19043           fprintf (stream, "\\%03o", c);
19044           len_so_far += 4;
19045         }
19046     }
19047
19048   fputs ("\"\n", stream);
19049 }
19050 \f
19051 /* Whether a register is callee saved or not.  This is necessary because high
19052    registers are marked as caller saved when optimizing for size on Thumb-1
19053    targets despite being callee saved in order to avoid using them.  */
19054 #define callee_saved_reg_p(reg) \
19055   (!call_used_regs[reg] \
19056    || (TARGET_THUMB1 && optimize_size \
19057        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19058
19059 /* Compute the register save mask for registers 0 through 12
19060    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19061
19062 static unsigned long
19063 arm_compute_save_reg0_reg12_mask (void)
19064 {
19065   unsigned long func_type = arm_current_func_type ();
19066   unsigned long save_reg_mask = 0;
19067   unsigned int reg;
19068
19069   if (IS_INTERRUPT (func_type))
19070     {
19071       unsigned int max_reg;
19072       /* Interrupt functions must not corrupt any registers,
19073          even call clobbered ones.  If this is a leaf function
19074          we can just examine the registers used by the RTL, but
19075          otherwise we have to assume that whatever function is
19076          called might clobber anything, and so we have to save
19077          all the call-clobbered registers as well.  */
19078       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19079         /* FIQ handlers have registers r8 - r12 banked, so
19080            we only need to check r0 - r7, Normal ISRs only
19081            bank r14 and r15, so we must check up to r12.
19082            r13 is the stack pointer which is always preserved,
19083            so we do not need to consider it here.  */
19084         max_reg = 7;
19085       else
19086         max_reg = 12;
19087
19088       for (reg = 0; reg <= max_reg; reg++)
19089         if (df_regs_ever_live_p (reg)
19090             || (! crtl->is_leaf && call_used_regs[reg]))
19091           save_reg_mask |= (1 << reg);
19092
19093       /* Also save the pic base register if necessary.  */
19094       if (flag_pic
19095           && !TARGET_SINGLE_PIC_BASE
19096           && arm_pic_register != INVALID_REGNUM
19097           && crtl->uses_pic_offset_table)
19098         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19099     }
19100   else if (IS_VOLATILE(func_type))
19101     {
19102       /* For noreturn functions we historically omitted register saves
19103          altogether.  However this really messes up debugging.  As a
19104          compromise save just the frame pointers.  Combined with the link
19105          register saved elsewhere this should be sufficient to get
19106          a backtrace.  */
19107       if (frame_pointer_needed)
19108         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19109       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19110         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19111       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19112         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19113     }
19114   else
19115     {
19116       /* In the normal case we only need to save those registers
19117          which are call saved and which are used by this function.  */
19118       for (reg = 0; reg <= 11; reg++)
19119         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19120           save_reg_mask |= (1 << reg);
19121
19122       /* Handle the frame pointer as a special case.  */
19123       if (frame_pointer_needed)
19124         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19125
19126       /* If we aren't loading the PIC register,
19127          don't stack it even though it may be live.  */
19128       if (flag_pic
19129           && !TARGET_SINGLE_PIC_BASE
19130           && arm_pic_register != INVALID_REGNUM
19131           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19132               || crtl->uses_pic_offset_table))
19133         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19134
19135       /* The prologue will copy SP into R0, so save it.  */
19136       if (IS_STACKALIGN (func_type))
19137         save_reg_mask |= 1;
19138     }
19139
19140   /* Save registers so the exception handler can modify them.  */
19141   if (crtl->calls_eh_return)
19142     {
19143       unsigned int i;
19144
19145       for (i = 0; ; i++)
19146         {
19147           reg = EH_RETURN_DATA_REGNO (i);
19148           if (reg == INVALID_REGNUM)
19149             break;
19150           save_reg_mask |= 1 << reg;
19151         }
19152     }
19153
19154   return save_reg_mask;
19155 }
19156
19157 /* Return true if r3 is live at the start of the function.  */
19158
19159 static bool
19160 arm_r3_live_at_start_p (void)
19161 {
19162   /* Just look at cfg info, which is still close enough to correct at this
19163      point.  This gives false positives for broken functions that might use
19164      uninitialized data that happens to be allocated in r3, but who cares?  */
19165   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19166 }
19167
19168 /* Compute the number of bytes used to store the static chain register on the
19169    stack, above the stack frame.  We need to know this accurately to get the
19170    alignment of the rest of the stack frame correct.  */
19171
19172 static int
19173 arm_compute_static_chain_stack_bytes (void)
19174 {
19175   /* See the defining assertion in arm_expand_prologue.  */
19176   if (IS_NESTED (arm_current_func_type ())
19177       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19178           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19179                || flag_stack_clash_protection)
19180               && !df_regs_ever_live_p (LR_REGNUM)))
19181       && arm_r3_live_at_start_p ()
19182       && crtl->args.pretend_args_size == 0)
19183     return 4;
19184
19185   return 0;
19186 }
19187
19188 /* Compute a bit mask of which core registers need to be
19189    saved on the stack for the current function.
19190    This is used by arm_compute_frame_layout, which may add extra registers.  */
19191
19192 static unsigned long
19193 arm_compute_save_core_reg_mask (void)
19194 {
19195   unsigned int save_reg_mask = 0;
19196   unsigned long func_type = arm_current_func_type ();
19197   unsigned int reg;
19198
19199   if (IS_NAKED (func_type))
19200     /* This should never really happen.  */
19201     return 0;
19202
19203   /* If we are creating a stack frame, then we must save the frame pointer,
19204      IP (which will hold the old stack pointer), LR and the PC.  */
19205   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19206     save_reg_mask |=
19207       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19208       | (1 << IP_REGNUM)
19209       | (1 << LR_REGNUM)
19210       | (1 << PC_REGNUM);
19211
19212   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19213
19214   /* Decide if we need to save the link register.
19215      Interrupt routines have their own banked link register,
19216      so they never need to save it.
19217      Otherwise if we do not use the link register we do not need to save
19218      it.  If we are pushing other registers onto the stack however, we
19219      can save an instruction in the epilogue by pushing the link register
19220      now and then popping it back into the PC.  This incurs extra memory
19221      accesses though, so we only do it when optimizing for size, and only
19222      if we know that we will not need a fancy return sequence.  */
19223   if (df_regs_ever_live_p (LR_REGNUM)
19224       || (save_reg_mask
19225           && optimize_size
19226           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19227           && !crtl->tail_call_emit
19228           && !crtl->calls_eh_return))
19229     save_reg_mask |= 1 << LR_REGNUM;
19230
19231   if (cfun->machine->lr_save_eliminated)
19232     save_reg_mask &= ~ (1 << LR_REGNUM);
19233
19234   if (TARGET_REALLY_IWMMXT
19235       && ((bit_count (save_reg_mask)
19236            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19237                            arm_compute_static_chain_stack_bytes())
19238            ) % 2) != 0)
19239     {
19240       /* The total number of registers that are going to be pushed
19241          onto the stack is odd.  We need to ensure that the stack
19242          is 64-bit aligned before we start to save iWMMXt registers,
19243          and also before we start to create locals.  (A local variable
19244          might be a double or long long which we will load/store using
19245          an iWMMXt instruction).  Therefore we need to push another
19246          ARM register, so that the stack will be 64-bit aligned.  We
19247          try to avoid using the arg registers (r0 -r3) as they might be
19248          used to pass values in a tail call.  */
19249       for (reg = 4; reg <= 12; reg++)
19250         if ((save_reg_mask & (1 << reg)) == 0)
19251           break;
19252
19253       if (reg <= 12)
19254         save_reg_mask |= (1 << reg);
19255       else
19256         {
19257           cfun->machine->sibcall_blocked = 1;
19258           save_reg_mask |= (1 << 3);
19259         }
19260     }
19261
19262   /* We may need to push an additional register for use initializing the
19263      PIC base register.  */
19264   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19265       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19266     {
19267       reg = thumb_find_work_register (1 << 4);
19268       if (!call_used_regs[reg])
19269         save_reg_mask |= (1 << reg);
19270     }
19271
19272   return save_reg_mask;
19273 }
19274
19275 /* Compute a bit mask of which core registers need to be
19276    saved on the stack for the current function.  */
19277 static unsigned long
19278 thumb1_compute_save_core_reg_mask (void)
19279 {
19280   unsigned long mask;
19281   unsigned reg;
19282
19283   mask = 0;
19284   for (reg = 0; reg < 12; reg ++)
19285     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19286       mask |= 1 << reg;
19287
19288   /* Handle the frame pointer as a special case.  */
19289   if (frame_pointer_needed)
19290     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19291
19292   if (flag_pic
19293       && !TARGET_SINGLE_PIC_BASE
19294       && arm_pic_register != INVALID_REGNUM
19295       && crtl->uses_pic_offset_table)
19296     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19297
19298   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19299   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19300     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19301
19302   /* LR will also be pushed if any lo regs are pushed.  */
19303   if (mask & 0xff || thumb_force_lr_save ())
19304     mask |= (1 << LR_REGNUM);
19305
19306   /* Make sure we have a low work register if we need one.
19307      We will need one if we are going to push a high register,
19308      but we are not currently intending to push a low register.  */
19309   if ((mask & 0xff) == 0
19310       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19311     {
19312       /* Use thumb_find_work_register to choose which register
19313          we will use.  If the register is live then we will
19314          have to push it.  Use LAST_LO_REGNUM as our fallback
19315          choice for the register to select.  */
19316       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19317       /* Make sure the register returned by thumb_find_work_register is
19318          not part of the return value.  */
19319       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19320         reg = LAST_LO_REGNUM;
19321
19322       if (callee_saved_reg_p (reg))
19323         mask |= 1 << reg;
19324     }
19325
19326   /* The 504 below is 8 bytes less than 512 because there are two possible
19327      alignment words.  We can't tell here if they will be present or not so we
19328      have to play it safe and assume that they are. */
19329   if ((CALLER_INTERWORKING_SLOT_SIZE +
19330        ROUND_UP_WORD (get_frame_size ()) +
19331        crtl->outgoing_args_size) >= 504)
19332     {
19333       /* This is the same as the code in thumb1_expand_prologue() which
19334          determines which register to use for stack decrement. */
19335       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19336         if (mask & (1 << reg))
19337           break;
19338
19339       if (reg > LAST_LO_REGNUM)
19340         {
19341           /* Make sure we have a register available for stack decrement. */
19342           mask |= 1 << LAST_LO_REGNUM;
19343         }
19344     }
19345
19346   return mask;
19347 }
19348
19349
19350 /* Return the number of bytes required to save VFP registers.  */
19351 static int
19352 arm_get_vfp_saved_size (void)
19353 {
19354   unsigned int regno;
19355   int count;
19356   int saved;
19357
19358   saved = 0;
19359   /* Space for saved VFP registers.  */
19360   if (TARGET_HARD_FLOAT)
19361     {
19362       count = 0;
19363       for (regno = FIRST_VFP_REGNUM;
19364            regno < LAST_VFP_REGNUM;
19365            regno += 2)
19366         {
19367           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19368               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19369             {
19370               if (count > 0)
19371                 {
19372                   /* Workaround ARM10 VFPr1 bug.  */
19373                   if (count == 2 && !arm_arch6)
19374                     count++;
19375                   saved += count * 8;
19376                 }
19377               count = 0;
19378             }
19379           else
19380             count++;
19381         }
19382       if (count > 0)
19383         {
19384           if (count == 2 && !arm_arch6)
19385             count++;
19386           saved += count * 8;
19387         }
19388     }
19389   return saved;
19390 }
19391
19392
19393 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19394    everything bar the final return instruction.  If simple_return is true,
19395    then do not output epilogue, because it has already been emitted in RTL.  */
19396 const char *
19397 output_return_instruction (rtx operand, bool really_return, bool reverse,
19398                            bool simple_return)
19399 {
19400   char conditional[10];
19401   char instr[100];
19402   unsigned reg;
19403   unsigned long live_regs_mask;
19404   unsigned long func_type;
19405   arm_stack_offsets *offsets;
19406
19407   func_type = arm_current_func_type ();
19408
19409   if (IS_NAKED (func_type))
19410     return "";
19411
19412   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19413     {
19414       /* If this function was declared non-returning, and we have
19415          found a tail call, then we have to trust that the called
19416          function won't return.  */
19417       if (really_return)
19418         {
19419           rtx ops[2];
19420
19421           /* Otherwise, trap an attempted return by aborting.  */
19422           ops[0] = operand;
19423           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19424                                        : "abort");
19425           assemble_external_libcall (ops[1]);
19426           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19427         }
19428
19429       return "";
19430     }
19431
19432   gcc_assert (!cfun->calls_alloca || really_return);
19433
19434   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19435
19436   cfun->machine->return_used_this_function = 1;
19437
19438   offsets = arm_get_frame_offsets ();
19439   live_regs_mask = offsets->saved_regs_mask;
19440
19441   if (!simple_return && live_regs_mask)
19442     {
19443       const char * return_reg;
19444
19445       /* If we do not have any special requirements for function exit
19446          (e.g. interworking) then we can load the return address
19447          directly into the PC.  Otherwise we must load it into LR.  */
19448       if (really_return
19449           && !IS_CMSE_ENTRY (func_type)
19450           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19451         return_reg = reg_names[PC_REGNUM];
19452       else
19453         return_reg = reg_names[LR_REGNUM];
19454
19455       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19456         {
19457           /* There are three possible reasons for the IP register
19458              being saved.  1) a stack frame was created, in which case
19459              IP contains the old stack pointer, or 2) an ISR routine
19460              corrupted it, or 3) it was saved to align the stack on
19461              iWMMXt.  In case 1, restore IP into SP, otherwise just
19462              restore IP.  */
19463           if (frame_pointer_needed)
19464             {
19465               live_regs_mask &= ~ (1 << IP_REGNUM);
19466               live_regs_mask |=   (1 << SP_REGNUM);
19467             }
19468           else
19469             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19470         }
19471
19472       /* On some ARM architectures it is faster to use LDR rather than
19473          LDM to load a single register.  On other architectures, the
19474          cost is the same.  In 26 bit mode, or for exception handlers,
19475          we have to use LDM to load the PC so that the CPSR is also
19476          restored.  */
19477       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19478         if (live_regs_mask == (1U << reg))
19479           break;
19480
19481       if (reg <= LAST_ARM_REGNUM
19482           && (reg != LR_REGNUM
19483               || ! really_return
19484               || ! IS_INTERRUPT (func_type)))
19485         {
19486           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19487                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19488         }
19489       else
19490         {
19491           char *p;
19492           int first = 1;
19493
19494           /* Generate the load multiple instruction to restore the
19495              registers.  Note we can get here, even if
19496              frame_pointer_needed is true, but only if sp already
19497              points to the base of the saved core registers.  */
19498           if (live_regs_mask & (1 << SP_REGNUM))
19499             {
19500               unsigned HOST_WIDE_INT stack_adjust;
19501
19502               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19503               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19504
19505               if (stack_adjust && arm_arch5 && TARGET_ARM)
19506                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19507               else
19508                 {
19509                   /* If we can't use ldmib (SA110 bug),
19510                      then try to pop r3 instead.  */
19511                   if (stack_adjust)
19512                     live_regs_mask |= 1 << 3;
19513
19514                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19515                 }
19516             }
19517           /* For interrupt returns we have to use an LDM rather than
19518              a POP so that we can use the exception return variant.  */
19519           else if (IS_INTERRUPT (func_type))
19520             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19521           else
19522             sprintf (instr, "pop%s\t{", conditional);
19523
19524           p = instr + strlen (instr);
19525
19526           for (reg = 0; reg <= SP_REGNUM; reg++)
19527             if (live_regs_mask & (1 << reg))
19528               {
19529                 int l = strlen (reg_names[reg]);
19530
19531                 if (first)
19532                   first = 0;
19533                 else
19534                   {
19535                     memcpy (p, ", ", 2);
19536                     p += 2;
19537                   }
19538
19539                 memcpy (p, "%|", 2);
19540                 memcpy (p + 2, reg_names[reg], l);
19541                 p += l + 2;
19542               }
19543
19544           if (live_regs_mask & (1 << LR_REGNUM))
19545             {
19546               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19547               /* If returning from an interrupt, restore the CPSR.  */
19548               if (IS_INTERRUPT (func_type))
19549                 strcat (p, "^");
19550             }
19551           else
19552             strcpy (p, "}");
19553         }
19554
19555       output_asm_insn (instr, & operand);
19556
19557       /* See if we need to generate an extra instruction to
19558          perform the actual function return.  */
19559       if (really_return
19560           && func_type != ARM_FT_INTERWORKED
19561           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19562         {
19563           /* The return has already been handled
19564              by loading the LR into the PC.  */
19565           return "";
19566         }
19567     }
19568
19569   if (really_return)
19570     {
19571       switch ((int) ARM_FUNC_TYPE (func_type))
19572         {
19573         case ARM_FT_ISR:
19574         case ARM_FT_FIQ:
19575           /* ??? This is wrong for unified assembly syntax.  */
19576           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19577           break;
19578
19579         case ARM_FT_INTERWORKED:
19580           gcc_assert (arm_arch5 || arm_arch4t);
19581           sprintf (instr, "bx%s\t%%|lr", conditional);
19582           break;
19583
19584         case ARM_FT_EXCEPTION:
19585           /* ??? This is wrong for unified assembly syntax.  */
19586           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19587           break;
19588
19589         default:
19590           if (IS_CMSE_ENTRY (func_type))
19591             {
19592               /* Check if we have to clear the 'GE bits' which is only used if
19593                  parallel add and subtraction instructions are available.  */
19594               if (TARGET_INT_SIMD)
19595                 snprintf (instr, sizeof (instr),
19596                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19597               else
19598                 snprintf (instr, sizeof (instr),
19599                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19600
19601               output_asm_insn (instr, & operand);
19602               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19603                 {
19604                   /* Clear the cumulative exception-status bits (0-4,7) and the
19605                      condition code bits (28-31) of the FPSCR.  We need to
19606                      remember to clear the first scratch register used (IP) and
19607                      save and restore the second (r4).  */
19608                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19609                   output_asm_insn (instr, & operand);
19610                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19611                   output_asm_insn (instr, & operand);
19612                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19613                   output_asm_insn (instr, & operand);
19614                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19615                   output_asm_insn (instr, & operand);
19616                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19617                   output_asm_insn (instr, & operand);
19618                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19619                   output_asm_insn (instr, & operand);
19620                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19621                   output_asm_insn (instr, & operand);
19622                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19623                   output_asm_insn (instr, & operand);
19624                 }
19625               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19626             }
19627           /* Use bx if it's available.  */
19628           else if (arm_arch5 || arm_arch4t)
19629             sprintf (instr, "bx%s\t%%|lr", conditional);
19630           else
19631             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19632           break;
19633         }
19634
19635       output_asm_insn (instr, & operand);
19636     }
19637
19638   return "";
19639 }
19640
19641 /* Output in FILE asm statements needed to declare the NAME of the function
19642    defined by its DECL node.  */
19643
19644 void
19645 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19646 {
19647   size_t cmse_name_len;
19648   char *cmse_name = 0;
19649   char cmse_prefix[] = "__acle_se_";
19650
19651   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19652      extra function label for each function with the 'cmse_nonsecure_entry'
19653      attribute.  This extra function label should be prepended with
19654      '__acle_se_', telling the linker that it needs to create secure gateway
19655      veneers for this function.  */
19656   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19657                                     DECL_ATTRIBUTES (decl)))
19658     {
19659       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19660       cmse_name = XALLOCAVEC (char, cmse_name_len);
19661       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19662       targetm.asm_out.globalize_label (file, cmse_name);
19663
19664       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19665       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19666     }
19667
19668   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19669   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19670   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19671   ASM_OUTPUT_LABEL (file, name);
19672
19673   if (cmse_name)
19674     ASM_OUTPUT_LABEL (file, cmse_name);
19675
19676   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19677 }
19678
19679 /* Write the function name into the code section, directly preceding
19680    the function prologue.
19681
19682    Code will be output similar to this:
19683      t0
19684          .ascii "arm_poke_function_name", 0
19685          .align
19686      t1
19687          .word 0xff000000 + (t1 - t0)
19688      arm_poke_function_name
19689          mov     ip, sp
19690          stmfd   sp!, {fp, ip, lr, pc}
19691          sub     fp, ip, #4
19692
19693    When performing a stack backtrace, code can inspect the value
19694    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19695    at location pc - 12 and the top 8 bits are set, then we know
19696    that there is a function name embedded immediately preceding this
19697    location and has length ((pc[-3]) & 0xff000000).
19698
19699    We assume that pc is declared as a pointer to an unsigned long.
19700
19701    It is of no benefit to output the function name if we are assembling
19702    a leaf function.  These function types will not contain a stack
19703    backtrace structure, therefore it is not possible to determine the
19704    function name.  */
19705 void
19706 arm_poke_function_name (FILE *stream, const char *name)
19707 {
19708   unsigned long alignlength;
19709   unsigned long length;
19710   rtx           x;
19711
19712   length      = strlen (name) + 1;
19713   alignlength = ROUND_UP_WORD (length);
19714
19715   ASM_OUTPUT_ASCII (stream, name, length);
19716   ASM_OUTPUT_ALIGN (stream, 2);
19717   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19718   assemble_aligned_integer (UNITS_PER_WORD, x);
19719 }
19720
19721 /* Place some comments into the assembler stream
19722    describing the current function.  */
19723 static void
19724 arm_output_function_prologue (FILE *f)
19725 {
19726   unsigned long func_type;
19727
19728   /* Sanity check.  */
19729   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19730
19731   func_type = arm_current_func_type ();
19732
19733   switch ((int) ARM_FUNC_TYPE (func_type))
19734     {
19735     default:
19736     case ARM_FT_NORMAL:
19737       break;
19738     case ARM_FT_INTERWORKED:
19739       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19740       break;
19741     case ARM_FT_ISR:
19742       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19743       break;
19744     case ARM_FT_FIQ:
19745       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19746       break;
19747     case ARM_FT_EXCEPTION:
19748       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19749       break;
19750     }
19751
19752   if (IS_NAKED (func_type))
19753     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19754
19755   if (IS_VOLATILE (func_type))
19756     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19757
19758   if (IS_NESTED (func_type))
19759     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19760   if (IS_STACKALIGN (func_type))
19761     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19762   if (IS_CMSE_ENTRY (func_type))
19763     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19764
19765   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19766                crtl->args.size,
19767                crtl->args.pretend_args_size,
19768                (HOST_WIDE_INT) get_frame_size ());
19769
19770   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19771                frame_pointer_needed,
19772                cfun->machine->uses_anonymous_args);
19773
19774   if (cfun->machine->lr_save_eliminated)
19775     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19776
19777   if (crtl->calls_eh_return)
19778     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19779
19780 }
19781
19782 static void
19783 arm_output_function_epilogue (FILE *)
19784 {
19785   arm_stack_offsets *offsets;
19786
19787   if (TARGET_THUMB1)
19788     {
19789       int regno;
19790
19791       /* Emit any call-via-reg trampolines that are needed for v4t support
19792          of call_reg and call_value_reg type insns.  */
19793       for (regno = 0; regno < LR_REGNUM; regno++)
19794         {
19795           rtx label = cfun->machine->call_via[regno];
19796
19797           if (label != NULL)
19798             {
19799               switch_to_section (function_section (current_function_decl));
19800               targetm.asm_out.internal_label (asm_out_file, "L",
19801                                               CODE_LABEL_NUMBER (label));
19802               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19803             }
19804         }
19805
19806       /* ??? Probably not safe to set this here, since it assumes that a
19807          function will be emitted as assembly immediately after we generate
19808          RTL for it.  This does not happen for inline functions.  */
19809       cfun->machine->return_used_this_function = 0;
19810     }
19811   else /* TARGET_32BIT */
19812     {
19813       /* We need to take into account any stack-frame rounding.  */
19814       offsets = arm_get_frame_offsets ();
19815
19816       gcc_assert (!use_return_insn (FALSE, NULL)
19817                   || (cfun->machine->return_used_this_function != 0)
19818                   || offsets->saved_regs == offsets->outgoing_args
19819                   || frame_pointer_needed);
19820     }
19821 }
19822
19823 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19824    STR and STRD.  If an even number of registers are being pushed, one
19825    or more STRD patterns are created for each register pair.  If an
19826    odd number of registers are pushed, emit an initial STR followed by
19827    as many STRD instructions as are needed.  This works best when the
19828    stack is initially 64-bit aligned (the normal case), since it
19829    ensures that each STRD is also 64-bit aligned.  */
19830 static void
19831 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19832 {
19833   int num_regs = 0;
19834   int i;
19835   int regno;
19836   rtx par = NULL_RTX;
19837   rtx dwarf = NULL_RTX;
19838   rtx tmp;
19839   bool first = true;
19840
19841   num_regs = bit_count (saved_regs_mask);
19842
19843   /* Must be at least one register to save, and can't save SP or PC.  */
19844   gcc_assert (num_regs > 0 && num_regs <= 14);
19845   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19846   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19847
19848   /* Create sequence for DWARF info.  All the frame-related data for
19849      debugging is held in this wrapper.  */
19850   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19851
19852   /* Describe the stack adjustment.  */
19853   tmp = gen_rtx_SET (stack_pointer_rtx,
19854                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19855   RTX_FRAME_RELATED_P (tmp) = 1;
19856   XVECEXP (dwarf, 0, 0) = tmp;
19857
19858   /* Find the first register.  */
19859   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19860     ;
19861
19862   i = 0;
19863
19864   /* If there's an odd number of registers to push.  Start off by
19865      pushing a single register.  This ensures that subsequent strd
19866      operations are dword aligned (assuming that SP was originally
19867      64-bit aligned).  */
19868   if ((num_regs & 1) != 0)
19869     {
19870       rtx reg, mem, insn;
19871
19872       reg = gen_rtx_REG (SImode, regno);
19873       if (num_regs == 1)
19874         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19875                                                      stack_pointer_rtx));
19876       else
19877         mem = gen_frame_mem (Pmode,
19878                              gen_rtx_PRE_MODIFY
19879                              (Pmode, stack_pointer_rtx,
19880                               plus_constant (Pmode, stack_pointer_rtx,
19881                                              -4 * num_regs)));
19882
19883       tmp = gen_rtx_SET (mem, reg);
19884       RTX_FRAME_RELATED_P (tmp) = 1;
19885       insn = emit_insn (tmp);
19886       RTX_FRAME_RELATED_P (insn) = 1;
19887       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19888       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19889       RTX_FRAME_RELATED_P (tmp) = 1;
19890       i++;
19891       regno++;
19892       XVECEXP (dwarf, 0, i) = tmp;
19893       first = false;
19894     }
19895
19896   while (i < num_regs)
19897     if (saved_regs_mask & (1 << regno))
19898       {
19899         rtx reg1, reg2, mem1, mem2;
19900         rtx tmp0, tmp1, tmp2;
19901         int regno2;
19902
19903         /* Find the register to pair with this one.  */
19904         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19905              regno2++)
19906           ;
19907
19908         reg1 = gen_rtx_REG (SImode, regno);
19909         reg2 = gen_rtx_REG (SImode, regno2);
19910
19911         if (first)
19912           {
19913             rtx insn;
19914
19915             first = false;
19916             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19917                                                         stack_pointer_rtx,
19918                                                         -4 * num_regs));
19919             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19920                                                         stack_pointer_rtx,
19921                                                         -4 * (num_regs - 1)));
19922             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19923                                 plus_constant (Pmode, stack_pointer_rtx,
19924                                                -4 * (num_regs)));
19925             tmp1 = gen_rtx_SET (mem1, reg1);
19926             tmp2 = gen_rtx_SET (mem2, reg2);
19927             RTX_FRAME_RELATED_P (tmp0) = 1;
19928             RTX_FRAME_RELATED_P (tmp1) = 1;
19929             RTX_FRAME_RELATED_P (tmp2) = 1;
19930             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19931             XVECEXP (par, 0, 0) = tmp0;
19932             XVECEXP (par, 0, 1) = tmp1;
19933             XVECEXP (par, 0, 2) = tmp2;
19934             insn = emit_insn (par);
19935             RTX_FRAME_RELATED_P (insn) = 1;
19936             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19937           }
19938         else
19939           {
19940             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19941                                                         stack_pointer_rtx,
19942                                                         4 * i));
19943             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19944                                                         stack_pointer_rtx,
19945                                                         4 * (i + 1)));
19946             tmp1 = gen_rtx_SET (mem1, reg1);
19947             tmp2 = gen_rtx_SET (mem2, reg2);
19948             RTX_FRAME_RELATED_P (tmp1) = 1;
19949             RTX_FRAME_RELATED_P (tmp2) = 1;
19950             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19951             XVECEXP (par, 0, 0) = tmp1;
19952             XVECEXP (par, 0, 1) = tmp2;
19953             emit_insn (par);
19954           }
19955
19956         /* Create unwind information.  This is an approximation.  */
19957         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19958                                            plus_constant (Pmode,
19959                                                           stack_pointer_rtx,
19960                                                           4 * i)),
19961                             reg1);
19962         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19963                                            plus_constant (Pmode,
19964                                                           stack_pointer_rtx,
19965                                                           4 * (i + 1))),
19966                             reg2);
19967
19968         RTX_FRAME_RELATED_P (tmp1) = 1;
19969         RTX_FRAME_RELATED_P (tmp2) = 1;
19970         XVECEXP (dwarf, 0, i + 1) = tmp1;
19971         XVECEXP (dwarf, 0, i + 2) = tmp2;
19972         i += 2;
19973         regno = regno2 + 1;
19974       }
19975     else
19976       regno++;
19977
19978   return;
19979 }
19980
19981 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19982    whenever possible, otherwise it emits single-word stores.  The first store
19983    also allocates stack space for all saved registers, using writeback with
19984    post-addressing mode.  All other stores use offset addressing.  If no STRD
19985    can be emitted, this function emits a sequence of single-word stores,
19986    and not an STM as before, because single-word stores provide more freedom
19987    scheduling and can be turned into an STM by peephole optimizations.  */
19988 static void
19989 arm_emit_strd_push (unsigned long saved_regs_mask)
19990 {
19991   int num_regs = 0;
19992   int i, j, dwarf_index  = 0;
19993   int offset = 0;
19994   rtx dwarf = NULL_RTX;
19995   rtx insn = NULL_RTX;
19996   rtx tmp, mem;
19997
19998   /* TODO: A more efficient code can be emitted by changing the
19999      layout, e.g., first push all pairs that can use STRD to keep the
20000      stack aligned, and then push all other registers.  */
20001   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20002     if (saved_regs_mask & (1 << i))
20003       num_regs++;
20004
20005   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20006   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20007   gcc_assert (num_regs > 0);
20008
20009   /* Create sequence for DWARF info.  */
20010   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20011
20012   /* For dwarf info, we generate explicit stack update.  */
20013   tmp = gen_rtx_SET (stack_pointer_rtx,
20014                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20015   RTX_FRAME_RELATED_P (tmp) = 1;
20016   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20017
20018   /* Save registers.  */
20019   offset = - 4 * num_regs;
20020   j = 0;
20021   while (j <= LAST_ARM_REGNUM)
20022     if (saved_regs_mask & (1 << j))
20023       {
20024         if ((j % 2 == 0)
20025             && (saved_regs_mask & (1 << (j + 1))))
20026           {
20027             /* Current register and previous register form register pair for
20028                which STRD can be generated.  */
20029             if (offset < 0)
20030               {
20031                 /* Allocate stack space for all saved registers.  */
20032                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20033                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20034                 mem = gen_frame_mem (DImode, tmp);
20035                 offset = 0;
20036               }
20037             else if (offset > 0)
20038               mem = gen_frame_mem (DImode,
20039                                    plus_constant (Pmode,
20040                                                   stack_pointer_rtx,
20041                                                   offset));
20042             else
20043               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20044
20045             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20046             RTX_FRAME_RELATED_P (tmp) = 1;
20047             tmp = emit_insn (tmp);
20048
20049             /* Record the first store insn.  */
20050             if (dwarf_index == 1)
20051               insn = tmp;
20052
20053             /* Generate dwarf info.  */
20054             mem = gen_frame_mem (SImode,
20055                                  plus_constant (Pmode,
20056                                                 stack_pointer_rtx,
20057                                                 offset));
20058             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20059             RTX_FRAME_RELATED_P (tmp) = 1;
20060             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20061
20062             mem = gen_frame_mem (SImode,
20063                                  plus_constant (Pmode,
20064                                                 stack_pointer_rtx,
20065                                                 offset + 4));
20066             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20067             RTX_FRAME_RELATED_P (tmp) = 1;
20068             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20069
20070             offset += 8;
20071             j += 2;
20072           }
20073         else
20074           {
20075             /* Emit a single word store.  */
20076             if (offset < 0)
20077               {
20078                 /* Allocate stack space for all saved registers.  */
20079                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20080                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20081                 mem = gen_frame_mem (SImode, tmp);
20082                 offset = 0;
20083               }
20084             else if (offset > 0)
20085               mem = gen_frame_mem (SImode,
20086                                    plus_constant (Pmode,
20087                                                   stack_pointer_rtx,
20088                                                   offset));
20089             else
20090               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20091
20092             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20093             RTX_FRAME_RELATED_P (tmp) = 1;
20094             tmp = emit_insn (tmp);
20095
20096             /* Record the first store insn.  */
20097             if (dwarf_index == 1)
20098               insn = tmp;
20099
20100             /* Generate dwarf info.  */
20101             mem = gen_frame_mem (SImode,
20102                                  plus_constant(Pmode,
20103                                                stack_pointer_rtx,
20104                                                offset));
20105             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20106             RTX_FRAME_RELATED_P (tmp) = 1;
20107             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20108
20109             offset += 4;
20110             j += 1;
20111           }
20112       }
20113     else
20114       j++;
20115
20116   /* Attach dwarf info to the first insn we generate.  */
20117   gcc_assert (insn != NULL_RTX);
20118   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20119   RTX_FRAME_RELATED_P (insn) = 1;
20120 }
20121
20122 /* Generate and emit an insn that we will recognize as a push_multi.
20123    Unfortunately, since this insn does not reflect very well the actual
20124    semantics of the operation, we need to annotate the insn for the benefit
20125    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20126    MASK for registers that should be annotated for DWARF2 frame unwind
20127    information.  */
20128 static rtx
20129 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20130 {
20131   int num_regs = 0;
20132   int num_dwarf_regs = 0;
20133   int i, j;
20134   rtx par;
20135   rtx dwarf;
20136   int dwarf_par_index;
20137   rtx tmp, reg;
20138
20139   /* We don't record the PC in the dwarf frame information.  */
20140   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20141
20142   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20143     {
20144       if (mask & (1 << i))
20145         num_regs++;
20146       if (dwarf_regs_mask & (1 << i))
20147         num_dwarf_regs++;
20148     }
20149
20150   gcc_assert (num_regs && num_regs <= 16);
20151   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20152
20153   /* For the body of the insn we are going to generate an UNSPEC in
20154      parallel with several USEs.  This allows the insn to be recognized
20155      by the push_multi pattern in the arm.md file.
20156
20157      The body of the insn looks something like this:
20158
20159        (parallel [
20160            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20161                                         (const_int:SI <num>)))
20162                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20163            (use (reg:SI XX))
20164            (use (reg:SI YY))
20165            ...
20166         ])
20167
20168      For the frame note however, we try to be more explicit and actually
20169      show each register being stored into the stack frame, plus a (single)
20170      decrement of the stack pointer.  We do it this way in order to be
20171      friendly to the stack unwinding code, which only wants to see a single
20172      stack decrement per instruction.  The RTL we generate for the note looks
20173      something like this:
20174
20175       (sequence [
20176            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20177            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20178            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20179            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20180            ...
20181         ])
20182
20183      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20184      instead we'd have a parallel expression detailing all
20185      the stores to the various memory addresses so that debug
20186      information is more up-to-date. Remember however while writing
20187      this to take care of the constraints with the push instruction.
20188
20189      Note also that this has to be taken care of for the VFP registers.
20190
20191      For more see PR43399.  */
20192
20193   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20194   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20195   dwarf_par_index = 1;
20196
20197   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20198     {
20199       if (mask & (1 << i))
20200         {
20201           reg = gen_rtx_REG (SImode, i);
20202
20203           XVECEXP (par, 0, 0)
20204             = gen_rtx_SET (gen_frame_mem
20205                            (BLKmode,
20206                             gen_rtx_PRE_MODIFY (Pmode,
20207                                                 stack_pointer_rtx,
20208                                                 plus_constant
20209                                                 (Pmode, stack_pointer_rtx,
20210                                                  -4 * num_regs))
20211                             ),
20212                            gen_rtx_UNSPEC (BLKmode,
20213                                            gen_rtvec (1, reg),
20214                                            UNSPEC_PUSH_MULT));
20215
20216           if (dwarf_regs_mask & (1 << i))
20217             {
20218               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20219                                  reg);
20220               RTX_FRAME_RELATED_P (tmp) = 1;
20221               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20222             }
20223
20224           break;
20225         }
20226     }
20227
20228   for (j = 1, i++; j < num_regs; i++)
20229     {
20230       if (mask & (1 << i))
20231         {
20232           reg = gen_rtx_REG (SImode, i);
20233
20234           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20235
20236           if (dwarf_regs_mask & (1 << i))
20237             {
20238               tmp
20239                 = gen_rtx_SET (gen_frame_mem
20240                                (SImode,
20241                                 plus_constant (Pmode, stack_pointer_rtx,
20242                                                4 * j)),
20243                                reg);
20244               RTX_FRAME_RELATED_P (tmp) = 1;
20245               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20246             }
20247
20248           j++;
20249         }
20250     }
20251
20252   par = emit_insn (par);
20253
20254   tmp = gen_rtx_SET (stack_pointer_rtx,
20255                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20256   RTX_FRAME_RELATED_P (tmp) = 1;
20257   XVECEXP (dwarf, 0, 0) = tmp;
20258
20259   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20260
20261   return par;
20262 }
20263
20264 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20265    SIZE is the offset to be adjusted.
20266    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20267 static void
20268 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20269 {
20270   rtx dwarf;
20271
20272   RTX_FRAME_RELATED_P (insn) = 1;
20273   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20274   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20275 }
20276
20277 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20278    SAVED_REGS_MASK shows which registers need to be restored.
20279
20280    Unfortunately, since this insn does not reflect very well the actual
20281    semantics of the operation, we need to annotate the insn for the benefit
20282    of DWARF2 frame unwind information.  */
20283 static void
20284 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20285 {
20286   int num_regs = 0;
20287   int i, j;
20288   rtx par;
20289   rtx dwarf = NULL_RTX;
20290   rtx tmp, reg;
20291   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20292   int offset_adj;
20293   int emit_update;
20294
20295   offset_adj = return_in_pc ? 1 : 0;
20296   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20297     if (saved_regs_mask & (1 << i))
20298       num_regs++;
20299
20300   gcc_assert (num_regs && num_regs <= 16);
20301
20302   /* If SP is in reglist, then we don't emit SP update insn.  */
20303   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20304
20305   /* The parallel needs to hold num_regs SETs
20306      and one SET for the stack update.  */
20307   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20308
20309   if (return_in_pc)
20310     XVECEXP (par, 0, 0) = ret_rtx;
20311
20312   if (emit_update)
20313     {
20314       /* Increment the stack pointer, based on there being
20315          num_regs 4-byte registers to restore.  */
20316       tmp = gen_rtx_SET (stack_pointer_rtx,
20317                          plus_constant (Pmode,
20318                                         stack_pointer_rtx,
20319                                         4 * num_regs));
20320       RTX_FRAME_RELATED_P (tmp) = 1;
20321       XVECEXP (par, 0, offset_adj) = tmp;
20322     }
20323
20324   /* Now restore every reg, which may include PC.  */
20325   for (j = 0, i = 0; j < num_regs; i++)
20326     if (saved_regs_mask & (1 << i))
20327       {
20328         reg = gen_rtx_REG (SImode, i);
20329         if ((num_regs == 1) && emit_update && !return_in_pc)
20330           {
20331             /* Emit single load with writeback.  */
20332             tmp = gen_frame_mem (SImode,
20333                                  gen_rtx_POST_INC (Pmode,
20334                                                    stack_pointer_rtx));
20335             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20336             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20337             return;
20338           }
20339
20340         tmp = gen_rtx_SET (reg,
20341                            gen_frame_mem
20342                            (SImode,
20343                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20344         RTX_FRAME_RELATED_P (tmp) = 1;
20345         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20346
20347         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20348            should not have PC, skip PC.  */
20349         if (i != PC_REGNUM)
20350           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20351
20352         j++;
20353       }
20354
20355   if (return_in_pc)
20356     par = emit_jump_insn (par);
20357   else
20358     par = emit_insn (par);
20359
20360   REG_NOTES (par) = dwarf;
20361   if (!return_in_pc)
20362     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20363                                  stack_pointer_rtx, stack_pointer_rtx);
20364 }
20365
20366 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20367    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20368
20369    Unfortunately, since this insn does not reflect very well the actual
20370    semantics of the operation, we need to annotate the insn for the benefit
20371    of DWARF2 frame unwind information.  */
20372 static void
20373 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20374 {
20375   int i, j;
20376   rtx par;
20377   rtx dwarf = NULL_RTX;
20378   rtx tmp, reg;
20379
20380   gcc_assert (num_regs && num_regs <= 32);
20381
20382     /* Workaround ARM10 VFPr1 bug.  */
20383   if (num_regs == 2 && !arm_arch6)
20384     {
20385       if (first_reg == 15)
20386         first_reg--;
20387
20388       num_regs++;
20389     }
20390
20391   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20392      there could be up to 32 D-registers to restore.
20393      If there are more than 16 D-registers, make two recursive calls,
20394      each of which emits one pop_multi instruction.  */
20395   if (num_regs > 16)
20396     {
20397       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20398       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20399       return;
20400     }
20401
20402   /* The parallel needs to hold num_regs SETs
20403      and one SET for the stack update.  */
20404   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20405
20406   /* Increment the stack pointer, based on there being
20407      num_regs 8-byte registers to restore.  */
20408   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20409   RTX_FRAME_RELATED_P (tmp) = 1;
20410   XVECEXP (par, 0, 0) = tmp;
20411
20412   /* Now show every reg that will be restored, using a SET for each.  */
20413   for (j = 0, i=first_reg; j < num_regs; i += 2)
20414     {
20415       reg = gen_rtx_REG (DFmode, i);
20416
20417       tmp = gen_rtx_SET (reg,
20418                          gen_frame_mem
20419                          (DFmode,
20420                           plus_constant (Pmode, base_reg, 8 * j)));
20421       RTX_FRAME_RELATED_P (tmp) = 1;
20422       XVECEXP (par, 0, j + 1) = tmp;
20423
20424       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20425
20426       j++;
20427     }
20428
20429   par = emit_insn (par);
20430   REG_NOTES (par) = dwarf;
20431
20432   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20433   if (REGNO (base_reg) == IP_REGNUM)
20434     {
20435       RTX_FRAME_RELATED_P (par) = 1;
20436       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20437     }
20438   else
20439     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20440                                  base_reg, base_reg);
20441 }
20442
20443 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20444    number of registers are being popped, multiple LDRD patterns are created for
20445    all register pairs.  If odd number of registers are popped, last register is
20446    loaded by using LDR pattern.  */
20447 static void
20448 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20449 {
20450   int num_regs = 0;
20451   int i, j;
20452   rtx par = NULL_RTX;
20453   rtx dwarf = NULL_RTX;
20454   rtx tmp, reg, tmp1;
20455   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20456
20457   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20458     if (saved_regs_mask & (1 << i))
20459       num_regs++;
20460
20461   gcc_assert (num_regs && num_regs <= 16);
20462
20463   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20464      to be popped.  So, if num_regs is even, now it will become odd,
20465      and we can generate pop with PC.  If num_regs is odd, it will be
20466      even now, and ldr with return can be generated for PC.  */
20467   if (return_in_pc)
20468     num_regs--;
20469
20470   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20471
20472   /* Var j iterates over all the registers to gather all the registers in
20473      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20474      A PARALLEL RTX of register-pair is created here, so that pattern for
20475      LDRD can be matched.  As PC is always last register to be popped, and
20476      we have already decremented num_regs if PC, we don't have to worry
20477      about PC in this loop.  */
20478   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20479     if (saved_regs_mask & (1 << j))
20480       {
20481         /* Create RTX for memory load.  */
20482         reg = gen_rtx_REG (SImode, j);
20483         tmp = gen_rtx_SET (reg,
20484                            gen_frame_mem (SImode,
20485                                plus_constant (Pmode,
20486                                               stack_pointer_rtx, 4 * i)));
20487         RTX_FRAME_RELATED_P (tmp) = 1;
20488
20489         if (i % 2 == 0)
20490           {
20491             /* When saved-register index (i) is even, the RTX to be emitted is
20492                yet to be created.  Hence create it first.  The LDRD pattern we
20493                are generating is :
20494                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20495                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20496                where target registers need not be consecutive.  */
20497             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20498             dwarf = NULL_RTX;
20499           }
20500
20501         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20502            added as 0th element and if i is odd, reg_i is added as 1st element
20503            of LDRD pattern shown above.  */
20504         XVECEXP (par, 0, (i % 2)) = tmp;
20505         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20506
20507         if ((i % 2) == 1)
20508           {
20509             /* When saved-register index (i) is odd, RTXs for both the registers
20510                to be loaded are generated in above given LDRD pattern, and the
20511                pattern can be emitted now.  */
20512             par = emit_insn (par);
20513             REG_NOTES (par) = dwarf;
20514             RTX_FRAME_RELATED_P (par) = 1;
20515           }
20516
20517         i++;
20518       }
20519
20520   /* If the number of registers pushed is odd AND return_in_pc is false OR
20521      number of registers are even AND return_in_pc is true, last register is
20522      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20523      then LDR with post increment.  */
20524
20525   /* Increment the stack pointer, based on there being
20526      num_regs 4-byte registers to restore.  */
20527   tmp = gen_rtx_SET (stack_pointer_rtx,
20528                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20529   RTX_FRAME_RELATED_P (tmp) = 1;
20530   tmp = emit_insn (tmp);
20531   if (!return_in_pc)
20532     {
20533       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20534                                    stack_pointer_rtx, stack_pointer_rtx);
20535     }
20536
20537   dwarf = NULL_RTX;
20538
20539   if (((num_regs % 2) == 1 && !return_in_pc)
20540       || ((num_regs % 2) == 0 && return_in_pc))
20541     {
20542       /* Scan for the single register to be popped.  Skip until the saved
20543          register is found.  */
20544       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20545
20546       /* Gen LDR with post increment here.  */
20547       tmp1 = gen_rtx_MEM (SImode,
20548                           gen_rtx_POST_INC (SImode,
20549                                             stack_pointer_rtx));
20550       set_mem_alias_set (tmp1, get_frame_alias_set ());
20551
20552       reg = gen_rtx_REG (SImode, j);
20553       tmp = gen_rtx_SET (reg, tmp1);
20554       RTX_FRAME_RELATED_P (tmp) = 1;
20555       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20556
20557       if (return_in_pc)
20558         {
20559           /* If return_in_pc, j must be PC_REGNUM.  */
20560           gcc_assert (j == PC_REGNUM);
20561           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20562           XVECEXP (par, 0, 0) = ret_rtx;
20563           XVECEXP (par, 0, 1) = tmp;
20564           par = emit_jump_insn (par);
20565         }
20566       else
20567         {
20568           par = emit_insn (tmp);
20569           REG_NOTES (par) = dwarf;
20570           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20571                                        stack_pointer_rtx, stack_pointer_rtx);
20572         }
20573
20574     }
20575   else if ((num_regs % 2) == 1 && return_in_pc)
20576     {
20577       /* There are 2 registers to be popped.  So, generate the pattern
20578          pop_multiple_with_stack_update_and_return to pop in PC.  */
20579       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20580     }
20581
20582   return;
20583 }
20584
20585 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20586    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20587    offset addressing and then generates one separate stack udpate. This provides
20588    more scheduling freedom, compared to writeback on every load.  However,
20589    if the function returns using load into PC directly
20590    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20591    before the last load.  TODO: Add a peephole optimization to recognize
20592    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20593    peephole optimization to merge the load at stack-offset zero
20594    with the stack update instruction using load with writeback
20595    in post-index addressing mode.  */
20596 static void
20597 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20598 {
20599   int j = 0;
20600   int offset = 0;
20601   rtx par = NULL_RTX;
20602   rtx dwarf = NULL_RTX;
20603   rtx tmp, mem;
20604
20605   /* Restore saved registers.  */
20606   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20607   j = 0;
20608   while (j <= LAST_ARM_REGNUM)
20609     if (saved_regs_mask & (1 << j))
20610       {
20611         if ((j % 2) == 0
20612             && (saved_regs_mask & (1 << (j + 1)))
20613             && (j + 1) != PC_REGNUM)
20614           {
20615             /* Current register and next register form register pair for which
20616                LDRD can be generated. PC is always the last register popped, and
20617                we handle it separately.  */
20618             if (offset > 0)
20619               mem = gen_frame_mem (DImode,
20620                                    plus_constant (Pmode,
20621                                                   stack_pointer_rtx,
20622                                                   offset));
20623             else
20624               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20625
20626             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20627             tmp = emit_insn (tmp);
20628             RTX_FRAME_RELATED_P (tmp) = 1;
20629
20630             /* Generate dwarf info.  */
20631
20632             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20633                                     gen_rtx_REG (SImode, j),
20634                                     NULL_RTX);
20635             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20636                                     gen_rtx_REG (SImode, j + 1),
20637                                     dwarf);
20638
20639             REG_NOTES (tmp) = dwarf;
20640
20641             offset += 8;
20642             j += 2;
20643           }
20644         else if (j != PC_REGNUM)
20645           {
20646             /* Emit a single word load.  */
20647             if (offset > 0)
20648               mem = gen_frame_mem (SImode,
20649                                    plus_constant (Pmode,
20650                                                   stack_pointer_rtx,
20651                                                   offset));
20652             else
20653               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20654
20655             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20656             tmp = emit_insn (tmp);
20657             RTX_FRAME_RELATED_P (tmp) = 1;
20658
20659             /* Generate dwarf info.  */
20660             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20661                                               gen_rtx_REG (SImode, j),
20662                                               NULL_RTX);
20663
20664             offset += 4;
20665             j += 1;
20666           }
20667         else /* j == PC_REGNUM */
20668           j++;
20669       }
20670     else
20671       j++;
20672
20673   /* Update the stack.  */
20674   if (offset > 0)
20675     {
20676       tmp = gen_rtx_SET (stack_pointer_rtx,
20677                          plus_constant (Pmode,
20678                                         stack_pointer_rtx,
20679                                         offset));
20680       tmp = emit_insn (tmp);
20681       arm_add_cfa_adjust_cfa_note (tmp, offset,
20682                                    stack_pointer_rtx, stack_pointer_rtx);
20683       offset = 0;
20684     }
20685
20686   if (saved_regs_mask & (1 << PC_REGNUM))
20687     {
20688       /* Only PC is to be popped.  */
20689       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20690       XVECEXP (par, 0, 0) = ret_rtx;
20691       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20692                          gen_frame_mem (SImode,
20693                                         gen_rtx_POST_INC (SImode,
20694                                                           stack_pointer_rtx)));
20695       RTX_FRAME_RELATED_P (tmp) = 1;
20696       XVECEXP (par, 0, 1) = tmp;
20697       par = emit_jump_insn (par);
20698
20699       /* Generate dwarf info.  */
20700       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20701                               gen_rtx_REG (SImode, PC_REGNUM),
20702                               NULL_RTX);
20703       REG_NOTES (par) = dwarf;
20704       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20705                                    stack_pointer_rtx, stack_pointer_rtx);
20706     }
20707 }
20708
20709 /* Calculate the size of the return value that is passed in registers.  */
20710 static unsigned
20711 arm_size_return_regs (void)
20712 {
20713   machine_mode mode;
20714
20715   if (crtl->return_rtx != 0)
20716     mode = GET_MODE (crtl->return_rtx);
20717   else
20718     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20719
20720   return GET_MODE_SIZE (mode);
20721 }
20722
20723 /* Return true if the current function needs to save/restore LR.  */
20724 static bool
20725 thumb_force_lr_save (void)
20726 {
20727   return !cfun->machine->lr_save_eliminated
20728          && (!crtl->is_leaf
20729              || thumb_far_jump_used_p ()
20730              || df_regs_ever_live_p (LR_REGNUM));
20731 }
20732
20733 /* We do not know if r3 will be available because
20734    we do have an indirect tailcall happening in this
20735    particular case.  */
20736 static bool
20737 is_indirect_tailcall_p (rtx call)
20738 {
20739   rtx pat = PATTERN (call);
20740
20741   /* Indirect tail call.  */
20742   pat = XVECEXP (pat, 0, 0);
20743   if (GET_CODE (pat) == SET)
20744     pat = SET_SRC (pat);
20745
20746   pat = XEXP (XEXP (pat, 0), 0);
20747   return REG_P (pat);
20748 }
20749
20750 /* Return true if r3 is used by any of the tail call insns in the
20751    current function.  */
20752 static bool
20753 any_sibcall_could_use_r3 (void)
20754 {
20755   edge_iterator ei;
20756   edge e;
20757
20758   if (!crtl->tail_call_emit)
20759     return false;
20760   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20761     if (e->flags & EDGE_SIBCALL)
20762       {
20763         rtx_insn *call = BB_END (e->src);
20764         if (!CALL_P (call))
20765           call = prev_nonnote_nondebug_insn (call);
20766         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20767         if (find_regno_fusage (call, USE, 3)
20768             || is_indirect_tailcall_p (call))
20769           return true;
20770       }
20771   return false;
20772 }
20773
20774
20775 /* Compute the distance from register FROM to register TO.
20776    These can be the arg pointer (26), the soft frame pointer (25),
20777    the stack pointer (13) or the hard frame pointer (11).
20778    In thumb mode r7 is used as the soft frame pointer, if needed.
20779    Typical stack layout looks like this:
20780
20781        old stack pointer -> |    |
20782                              ----
20783                             |    | \
20784                             |    |   saved arguments for
20785                             |    |   vararg functions
20786                             |    | /
20787                               --
20788    hard FP & arg pointer -> |    | \
20789                             |    |   stack
20790                             |    |   frame
20791                             |    | /
20792                               --
20793                             |    | \
20794                             |    |   call saved
20795                             |    |   registers
20796       soft frame pointer -> |    | /
20797                               --
20798                             |    | \
20799                             |    |   local
20800                             |    |   variables
20801      locals base pointer -> |    | /
20802                               --
20803                             |    | \
20804                             |    |   outgoing
20805                             |    |   arguments
20806    current stack pointer -> |    | /
20807                               --
20808
20809   For a given function some or all of these stack components
20810   may not be needed, giving rise to the possibility of
20811   eliminating some of the registers.
20812
20813   The values returned by this function must reflect the behavior
20814   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20815
20816   The sign of the number returned reflects the direction of stack
20817   growth, so the values are positive for all eliminations except
20818   from the soft frame pointer to the hard frame pointer.
20819
20820   SFP may point just inside the local variables block to ensure correct
20821   alignment.  */
20822
20823
20824 /* Return cached stack offsets.  */
20825
20826 static arm_stack_offsets *
20827 arm_get_frame_offsets (void)
20828 {
20829   struct arm_stack_offsets *offsets;
20830
20831   offsets = &cfun->machine->stack_offsets;
20832
20833   return offsets;
20834 }
20835
20836
20837 /* Calculate stack offsets.  These are used to calculate register elimination
20838    offsets and in prologue/epilogue code.  Also calculates which registers
20839    should be saved.  */
20840
20841 static void
20842 arm_compute_frame_layout (void)
20843 {
20844   struct arm_stack_offsets *offsets;
20845   unsigned long func_type;
20846   int saved;
20847   int core_saved;
20848   HOST_WIDE_INT frame_size;
20849   int i;
20850
20851   offsets = &cfun->machine->stack_offsets;
20852
20853   /* Initially this is the size of the local variables.  It will translated
20854      into an offset once we have determined the size of preceding data.  */
20855   frame_size = ROUND_UP_WORD (get_frame_size ());
20856
20857   /* Space for variadic functions.  */
20858   offsets->saved_args = crtl->args.pretend_args_size;
20859
20860   /* In Thumb mode this is incorrect, but never used.  */
20861   offsets->frame
20862     = (offsets->saved_args
20863        + arm_compute_static_chain_stack_bytes ()
20864        + (frame_pointer_needed ? 4 : 0));
20865
20866   if (TARGET_32BIT)
20867     {
20868       unsigned int regno;
20869
20870       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20871       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20872       saved = core_saved;
20873
20874       /* We know that SP will be doubleword aligned on entry, and we must
20875          preserve that condition at any subroutine call.  We also require the
20876          soft frame pointer to be doubleword aligned.  */
20877
20878       if (TARGET_REALLY_IWMMXT)
20879         {
20880           /* Check for the call-saved iWMMXt registers.  */
20881           for (regno = FIRST_IWMMXT_REGNUM;
20882                regno <= LAST_IWMMXT_REGNUM;
20883                regno++)
20884             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20885               saved += 8;
20886         }
20887
20888       func_type = arm_current_func_type ();
20889       /* Space for saved VFP registers.  */
20890       if (! IS_VOLATILE (func_type)
20891           && TARGET_HARD_FLOAT)
20892         saved += arm_get_vfp_saved_size ();
20893     }
20894   else /* TARGET_THUMB1 */
20895     {
20896       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20897       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20898       saved = core_saved;
20899       if (TARGET_BACKTRACE)
20900         saved += 16;
20901     }
20902
20903   /* Saved registers include the stack frame.  */
20904   offsets->saved_regs
20905     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20906   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20907
20908   /* A leaf function does not need any stack alignment if it has nothing
20909      on the stack.  */
20910   if (crtl->is_leaf && frame_size == 0
20911       /* However if it calls alloca(), we have a dynamically allocated
20912          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20913       && ! cfun->calls_alloca)
20914     {
20915       offsets->outgoing_args = offsets->soft_frame;
20916       offsets->locals_base = offsets->soft_frame;
20917       return;
20918     }
20919
20920   /* Ensure SFP has the correct alignment.  */
20921   if (ARM_DOUBLEWORD_ALIGN
20922       && (offsets->soft_frame & 7))
20923     {
20924       offsets->soft_frame += 4;
20925       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20926          when there is a stack frame as the alignment will be rolled into
20927          the normal stack adjustment.  */
20928       if (frame_size + crtl->outgoing_args_size == 0)
20929         {
20930           int reg = -1;
20931
20932           /* Register r3 is caller-saved.  Normally it does not need to be
20933              saved on entry by the prologue.  However if we choose to save
20934              it for padding then we may confuse the compiler into thinking
20935              a prologue sequence is required when in fact it is not.  This
20936              will occur when shrink-wrapping if r3 is used as a scratch
20937              register and there are no other callee-saved writes.
20938
20939              This situation can be avoided when other callee-saved registers
20940              are available and r3 is not mandatory if we choose a callee-saved
20941              register for padding.  */
20942           bool prefer_callee_reg_p = false;
20943
20944           /* If it is safe to use r3, then do so.  This sometimes
20945              generates better code on Thumb-2 by avoiding the need to
20946              use 32-bit push/pop instructions.  */
20947           if (! any_sibcall_could_use_r3 ()
20948               && arm_size_return_regs () <= 12
20949               && (offsets->saved_regs_mask & (1 << 3)) == 0
20950               && (TARGET_THUMB2
20951                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20952             {
20953               reg = 3;
20954               if (!TARGET_THUMB2)
20955                 prefer_callee_reg_p = true;
20956             }
20957           if (reg == -1
20958               || prefer_callee_reg_p)
20959             {
20960               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20961                 {
20962                   /* Avoid fixed registers; they may be changed at
20963                      arbitrary times so it's unsafe to restore them
20964                      during the epilogue.  */
20965                   if (!fixed_regs[i]
20966                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20967                     {
20968                       reg = i;
20969                       break;
20970                     }
20971                 }
20972             }
20973
20974           if (reg != -1)
20975             {
20976               offsets->saved_regs += 4;
20977               offsets->saved_regs_mask |= (1 << reg);
20978             }
20979         }
20980     }
20981
20982   offsets->locals_base = offsets->soft_frame + frame_size;
20983   offsets->outgoing_args = (offsets->locals_base
20984                             + crtl->outgoing_args_size);
20985
20986   if (ARM_DOUBLEWORD_ALIGN)
20987     {
20988       /* Ensure SP remains doubleword aligned.  */
20989       if (offsets->outgoing_args & 7)
20990         offsets->outgoing_args += 4;
20991       gcc_assert (!(offsets->outgoing_args & 7));
20992     }
20993 }
20994
20995
20996 /* Calculate the relative offsets for the different stack pointers.  Positive
20997    offsets are in the direction of stack growth.  */
20998
20999 HOST_WIDE_INT
21000 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21001 {
21002   arm_stack_offsets *offsets;
21003
21004   offsets = arm_get_frame_offsets ();
21005
21006   /* OK, now we have enough information to compute the distances.
21007      There must be an entry in these switch tables for each pair
21008      of registers in ELIMINABLE_REGS, even if some of the entries
21009      seem to be redundant or useless.  */
21010   switch (from)
21011     {
21012     case ARG_POINTER_REGNUM:
21013       switch (to)
21014         {
21015         case THUMB_HARD_FRAME_POINTER_REGNUM:
21016           return 0;
21017
21018         case FRAME_POINTER_REGNUM:
21019           /* This is the reverse of the soft frame pointer
21020              to hard frame pointer elimination below.  */
21021           return offsets->soft_frame - offsets->saved_args;
21022
21023         case ARM_HARD_FRAME_POINTER_REGNUM:
21024           /* This is only non-zero in the case where the static chain register
21025              is stored above the frame.  */
21026           return offsets->frame - offsets->saved_args - 4;
21027
21028         case STACK_POINTER_REGNUM:
21029           /* If nothing has been pushed on the stack at all
21030              then this will return -4.  This *is* correct!  */
21031           return offsets->outgoing_args - (offsets->saved_args + 4);
21032
21033         default:
21034           gcc_unreachable ();
21035         }
21036       gcc_unreachable ();
21037
21038     case FRAME_POINTER_REGNUM:
21039       switch (to)
21040         {
21041         case THUMB_HARD_FRAME_POINTER_REGNUM:
21042           return 0;
21043
21044         case ARM_HARD_FRAME_POINTER_REGNUM:
21045           /* The hard frame pointer points to the top entry in the
21046              stack frame.  The soft frame pointer to the bottom entry
21047              in the stack frame.  If there is no stack frame at all,
21048              then they are identical.  */
21049
21050           return offsets->frame - offsets->soft_frame;
21051
21052         case STACK_POINTER_REGNUM:
21053           return offsets->outgoing_args - offsets->soft_frame;
21054
21055         default:
21056           gcc_unreachable ();
21057         }
21058       gcc_unreachable ();
21059
21060     default:
21061       /* You cannot eliminate from the stack pointer.
21062          In theory you could eliminate from the hard frame
21063          pointer to the stack pointer, but this will never
21064          happen, since if a stack frame is not needed the
21065          hard frame pointer will never be used.  */
21066       gcc_unreachable ();
21067     }
21068 }
21069
21070 /* Given FROM and TO register numbers, say whether this elimination is
21071    allowed.  Frame pointer elimination is automatically handled.
21072
21073    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21074    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21075    pointer, we must eliminate FRAME_POINTER_REGNUM into
21076    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21077    ARG_POINTER_REGNUM.  */
21078
21079 bool
21080 arm_can_eliminate (const int from, const int to)
21081 {
21082   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21083           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21084           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21085           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21086            true);
21087 }
21088
21089 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21090    number of bytes pushed.  */
21091
21092 static int
21093 arm_save_coproc_regs(void)
21094 {
21095   int saved_size = 0;
21096   unsigned reg;
21097   unsigned start_reg;
21098   rtx insn;
21099
21100   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21101     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21102       {
21103         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21104         insn = gen_rtx_MEM (V2SImode, insn);
21105         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21106         RTX_FRAME_RELATED_P (insn) = 1;
21107         saved_size += 8;
21108       }
21109
21110   if (TARGET_HARD_FLOAT)
21111     {
21112       start_reg = FIRST_VFP_REGNUM;
21113
21114       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21115         {
21116           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21117               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21118             {
21119               if (start_reg != reg)
21120                 saved_size += vfp_emit_fstmd (start_reg,
21121                                               (reg - start_reg) / 2);
21122               start_reg = reg + 2;
21123             }
21124         }
21125       if (start_reg != reg)
21126         saved_size += vfp_emit_fstmd (start_reg,
21127                                       (reg - start_reg) / 2);
21128     }
21129   return saved_size;
21130 }
21131
21132
21133 /* Set the Thumb frame pointer from the stack pointer.  */
21134
21135 static void
21136 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21137 {
21138   HOST_WIDE_INT amount;
21139   rtx insn, dwarf;
21140
21141   amount = offsets->outgoing_args - offsets->locals_base;
21142   if (amount < 1024)
21143     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21144                                   stack_pointer_rtx, GEN_INT (amount)));
21145   else
21146     {
21147       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21148       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21149          expects the first two operands to be the same.  */
21150       if (TARGET_THUMB2)
21151         {
21152           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21153                                         stack_pointer_rtx,
21154                                         hard_frame_pointer_rtx));
21155         }
21156       else
21157         {
21158           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21159                                         hard_frame_pointer_rtx,
21160                                         stack_pointer_rtx));
21161         }
21162       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21163                            plus_constant (Pmode, stack_pointer_rtx, amount));
21164       RTX_FRAME_RELATED_P (dwarf) = 1;
21165       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21166     }
21167
21168   RTX_FRAME_RELATED_P (insn) = 1;
21169 }
21170
21171 struct scratch_reg {
21172   rtx reg;
21173   bool saved;
21174 };
21175
21176 /* Return a short-lived scratch register for use as a 2nd scratch register on
21177    function entry after the registers are saved in the prologue.  This register
21178    must be released by means of release_scratch_register_on_entry.  IP is not
21179    considered since it is always used as the 1st scratch register if available.
21180
21181    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21182    mask of live registers.  */
21183
21184 static void
21185 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21186                                unsigned long live_regs)
21187 {
21188   int regno = -1;
21189
21190   sr->saved = false;
21191
21192   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21193     regno = LR_REGNUM;
21194   else
21195     {
21196       unsigned int i;
21197
21198       for (i = 4; i < 11; i++)
21199         if (regno1 != i && (live_regs & (1 << i)) != 0)
21200           {
21201             regno = i;
21202             break;
21203           }
21204
21205       if (regno < 0)
21206         {
21207           /* If IP is used as the 1st scratch register for a nested function,
21208              then either r3 wasn't available or is used to preserve IP.  */
21209           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21210             regno1 = 3;
21211           regno = (regno1 == 3 ? 2 : 3);
21212           sr->saved
21213             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21214                                regno);
21215         }
21216     }
21217
21218   sr->reg = gen_rtx_REG (SImode, regno);
21219   if (sr->saved)
21220     {
21221       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21222       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21223       rtx x = gen_rtx_SET (stack_pointer_rtx,
21224                            plus_constant (Pmode, stack_pointer_rtx, -4));
21225       RTX_FRAME_RELATED_P (insn) = 1;
21226       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21227     }
21228 }
21229
21230 /* Release a scratch register obtained from the preceding function.  */
21231
21232 static void
21233 release_scratch_register_on_entry (struct scratch_reg *sr)
21234 {
21235   if (sr->saved)
21236     {
21237       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21238       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21239       rtx x = gen_rtx_SET (stack_pointer_rtx,
21240                            plus_constant (Pmode, stack_pointer_rtx, 4));
21241       RTX_FRAME_RELATED_P (insn) = 1;
21242       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21243     }
21244 }
21245
21246 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21247
21248 #if PROBE_INTERVAL > 4096
21249 #error Cannot use indexed addressing mode for stack probing
21250 #endif
21251
21252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21253    inclusive.  These are offsets from the current stack pointer.  REGNO1
21254    is the index number of the 1st scratch register and LIVE_REGS is the
21255    mask of live registers.  */
21256
21257 static void
21258 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21259                             unsigned int regno1, unsigned long live_regs)
21260 {
21261   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21262
21263   /* See if we have a constant small number of probes to generate.  If so,
21264      that's the easy case.  */
21265   if (size <= PROBE_INTERVAL)
21266     {
21267       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21268       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21269       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21270     }
21271
21272   /* The run-time loop is made up of 10 insns in the generic case while the
21273      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21274   else if (size <= 5 * PROBE_INTERVAL)
21275     {
21276       HOST_WIDE_INT i, rem;
21277
21278       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21279       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21280       emit_stack_probe (reg1);
21281
21282       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21283          it exceeds SIZE.  If only two probes are needed, this will not
21284          generate any code.  Then probe at FIRST + SIZE.  */
21285       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21286         {
21287           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21288           emit_stack_probe (reg1);
21289         }
21290
21291       rem = size - (i - PROBE_INTERVAL);
21292       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21293         {
21294           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21295           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21296         }
21297       else
21298         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21299     }
21300
21301   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21302      extra careful with variables wrapping around because we might be at
21303      the very top (or the very bottom) of the address space and we have
21304      to be able to handle this case properly; in particular, we use an
21305      equality test for the loop condition.  */
21306   else
21307     {
21308       HOST_WIDE_INT rounded_size;
21309       struct scratch_reg sr;
21310
21311       get_scratch_register_on_entry (&sr, regno1, live_regs);
21312
21313       emit_move_insn (reg1, GEN_INT (first));
21314
21315
21316       /* Step 1: round SIZE to the previous multiple of the interval.  */
21317
21318       rounded_size = size & -PROBE_INTERVAL;
21319       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21320
21321
21322       /* Step 2: compute initial and final value of the loop counter.  */
21323
21324       /* TEST_ADDR = SP + FIRST.  */
21325       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21326
21327       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21328       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21329
21330
21331       /* Step 3: the loop
21332
21333          do
21334            {
21335              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21336              probe at TEST_ADDR
21337            }
21338          while (TEST_ADDR != LAST_ADDR)
21339
21340          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21341          until it is equal to ROUNDED_SIZE.  */
21342
21343       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21344
21345
21346       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21347          that SIZE is equal to ROUNDED_SIZE.  */
21348
21349       if (size != rounded_size)
21350         {
21351           HOST_WIDE_INT rem = size - rounded_size;
21352
21353           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21354             {
21355               emit_set_insn (sr.reg,
21356                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21357               emit_stack_probe (plus_constant (Pmode, sr.reg,
21358                                                PROBE_INTERVAL - rem));
21359             }
21360           else
21361             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21362         }
21363
21364       release_scratch_register_on_entry (&sr);
21365     }
21366
21367   /* Make sure nothing is scheduled before we are done.  */
21368   emit_insn (gen_blockage ());
21369 }
21370
21371 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21372    absolute addresses.  */
21373
21374 const char *
21375 output_probe_stack_range (rtx reg1, rtx reg2)
21376 {
21377   static int labelno = 0;
21378   char loop_lab[32];
21379   rtx xops[2];
21380
21381   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21382
21383   /* Loop.  */
21384   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21385
21386   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21387   xops[0] = reg1;
21388   xops[1] = GEN_INT (PROBE_INTERVAL);
21389   output_asm_insn ("sub\t%0, %0, %1", xops);
21390
21391   /* Probe at TEST_ADDR.  */
21392   output_asm_insn ("str\tr0, [%0, #0]", xops);
21393
21394   /* Test if TEST_ADDR == LAST_ADDR.  */
21395   xops[1] = reg2;
21396   output_asm_insn ("cmp\t%0, %1", xops);
21397
21398   /* Branch.  */
21399   fputs ("\tbne\t", asm_out_file);
21400   assemble_name_raw (asm_out_file, loop_lab);
21401   fputc ('\n', asm_out_file);
21402
21403   return "";
21404 }
21405
21406 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21407    function.  */
21408 void
21409 arm_expand_prologue (void)
21410 {
21411   rtx amount;
21412   rtx insn;
21413   rtx ip_rtx;
21414   unsigned long live_regs_mask;
21415   unsigned long func_type;
21416   int fp_offset = 0;
21417   int saved_pretend_args = 0;
21418   int saved_regs = 0;
21419   unsigned HOST_WIDE_INT args_to_push;
21420   HOST_WIDE_INT size;
21421   arm_stack_offsets *offsets;
21422   bool clobber_ip;
21423
21424   func_type = arm_current_func_type ();
21425
21426   /* Naked functions don't have prologues.  */
21427   if (IS_NAKED (func_type))
21428     {
21429       if (flag_stack_usage_info)
21430         current_function_static_stack_size = 0;
21431       return;
21432     }
21433
21434   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21435   args_to_push = crtl->args.pretend_args_size;
21436
21437   /* Compute which register we will have to save onto the stack.  */
21438   offsets = arm_get_frame_offsets ();
21439   live_regs_mask = offsets->saved_regs_mask;
21440
21441   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21442
21443   if (IS_STACKALIGN (func_type))
21444     {
21445       rtx r0, r1;
21446
21447       /* Handle a word-aligned stack pointer.  We generate the following:
21448
21449           mov r0, sp
21450           bic r1, r0, #7
21451           mov sp, r1
21452           <save and restore r0 in normal prologue/epilogue>
21453           mov sp, r0
21454           bx lr
21455
21456          The unwinder doesn't need to know about the stack realignment.
21457          Just tell it we saved SP in r0.  */
21458       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21459
21460       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21461       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21462
21463       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21464       RTX_FRAME_RELATED_P (insn) = 1;
21465       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21466
21467       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21468
21469       /* ??? The CFA changes here, which may cause GDB to conclude that it
21470          has entered a different function.  That said, the unwind info is
21471          correct, individually, before and after this instruction because
21472          we've described the save of SP, which will override the default
21473          handling of SP as restoring from the CFA.  */
21474       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21475     }
21476
21477   /* The static chain register is the same as the IP register.  If it is
21478      clobbered when creating the frame, we need to save and restore it.  */
21479   clobber_ip = IS_NESTED (func_type)
21480                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21481                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21482                         || flag_stack_clash_protection)
21483                        && !df_regs_ever_live_p (LR_REGNUM)
21484                        && arm_r3_live_at_start_p ()));
21485
21486   /* Find somewhere to store IP whilst the frame is being created.
21487      We try the following places in order:
21488
21489        1. The last argument register r3 if it is available.
21490        2. A slot on the stack above the frame if there are no
21491           arguments to push onto the stack.
21492        3. Register r3 again, after pushing the argument registers
21493           onto the stack, if this is a varargs function.
21494        4. The last slot on the stack created for the arguments to
21495           push, if this isn't a varargs function.
21496
21497      Note - we only need to tell the dwarf2 backend about the SP
21498      adjustment in the second variant; the static chain register
21499      doesn't need to be unwound, as it doesn't contain a value
21500      inherited from the caller.  */
21501   if (clobber_ip)
21502     {
21503       if (!arm_r3_live_at_start_p ())
21504         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21505       else if (args_to_push == 0)
21506         {
21507           rtx addr, dwarf;
21508
21509           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21510           saved_regs += 4;
21511
21512           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21513           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21514           fp_offset = 4;
21515
21516           /* Just tell the dwarf backend that we adjusted SP.  */
21517           dwarf = gen_rtx_SET (stack_pointer_rtx,
21518                                plus_constant (Pmode, stack_pointer_rtx,
21519                                               -fp_offset));
21520           RTX_FRAME_RELATED_P (insn) = 1;
21521           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21522         }
21523       else
21524         {
21525           /* Store the args on the stack.  */
21526           if (cfun->machine->uses_anonymous_args)
21527             {
21528               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21529                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21530               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21531               saved_pretend_args = 1;
21532             }
21533           else
21534             {
21535               rtx addr, dwarf;
21536
21537               if (args_to_push == 4)
21538                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21539               else
21540                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21541                                            plus_constant (Pmode,
21542                                                           stack_pointer_rtx,
21543                                                           -args_to_push));
21544
21545               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21546
21547               /* Just tell the dwarf backend that we adjusted SP.  */
21548               dwarf = gen_rtx_SET (stack_pointer_rtx,
21549                                    plus_constant (Pmode, stack_pointer_rtx,
21550                                                   -args_to_push));
21551               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21552             }
21553
21554           RTX_FRAME_RELATED_P (insn) = 1;
21555           fp_offset = args_to_push;
21556           args_to_push = 0;
21557         }
21558     }
21559
21560   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21561     {
21562       if (IS_INTERRUPT (func_type))
21563         {
21564           /* Interrupt functions must not corrupt any registers.
21565              Creating a frame pointer however, corrupts the IP
21566              register, so we must push it first.  */
21567           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21568
21569           /* Do not set RTX_FRAME_RELATED_P on this insn.
21570              The dwarf stack unwinding code only wants to see one
21571              stack decrement per function, and this is not it.  If
21572              this instruction is labeled as being part of the frame
21573              creation sequence then dwarf2out_frame_debug_expr will
21574              die when it encounters the assignment of IP to FP
21575              later on, since the use of SP here establishes SP as
21576              the CFA register and not IP.
21577
21578              Anyway this instruction is not really part of the stack
21579              frame creation although it is part of the prologue.  */
21580         }
21581
21582       insn = emit_set_insn (ip_rtx,
21583                             plus_constant (Pmode, stack_pointer_rtx,
21584                                            fp_offset));
21585       RTX_FRAME_RELATED_P (insn) = 1;
21586     }
21587
21588   if (args_to_push)
21589     {
21590       /* Push the argument registers, or reserve space for them.  */
21591       if (cfun->machine->uses_anonymous_args)
21592         insn = emit_multi_reg_push
21593           ((0xf0 >> (args_to_push / 4)) & 0xf,
21594            (0xf0 >> (args_to_push / 4)) & 0xf);
21595       else
21596         insn = emit_insn
21597           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21598                        GEN_INT (- args_to_push)));
21599       RTX_FRAME_RELATED_P (insn) = 1;
21600     }
21601
21602   /* If this is an interrupt service routine, and the link register
21603      is going to be pushed, and we're not generating extra
21604      push of IP (needed when frame is needed and frame layout if apcs),
21605      subtracting four from LR now will mean that the function return
21606      can be done with a single instruction.  */
21607   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21608       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21609       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21610       && TARGET_ARM)
21611     {
21612       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21613
21614       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21615     }
21616
21617   if (live_regs_mask)
21618     {
21619       unsigned long dwarf_regs_mask = live_regs_mask;
21620
21621       saved_regs += bit_count (live_regs_mask) * 4;
21622       if (optimize_size && !frame_pointer_needed
21623           && saved_regs == offsets->saved_regs - offsets->saved_args)
21624         {
21625           /* If no coprocessor registers are being pushed and we don't have
21626              to worry about a frame pointer then push extra registers to
21627              create the stack frame.  This is done in a way that does not
21628              alter the frame layout, so is independent of the epilogue.  */
21629           int n;
21630           int frame;
21631           n = 0;
21632           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21633             n++;
21634           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21635           if (frame && n * 4 >= frame)
21636             {
21637               n = frame / 4;
21638               live_regs_mask |= (1 << n) - 1;
21639               saved_regs += frame;
21640             }
21641         }
21642
21643       if (TARGET_LDRD
21644           && current_tune->prefer_ldrd_strd
21645           && !optimize_function_for_size_p (cfun))
21646         {
21647           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21648           if (TARGET_THUMB2)
21649             thumb2_emit_strd_push (live_regs_mask);
21650           else if (TARGET_ARM
21651                    && !TARGET_APCS_FRAME
21652                    && !IS_INTERRUPT (func_type))
21653             arm_emit_strd_push (live_regs_mask);
21654           else
21655             {
21656               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21657               RTX_FRAME_RELATED_P (insn) = 1;
21658             }
21659         }
21660       else
21661         {
21662           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21663           RTX_FRAME_RELATED_P (insn) = 1;
21664         }
21665     }
21666
21667   if (! IS_VOLATILE (func_type))
21668     saved_regs += arm_save_coproc_regs ();
21669
21670   if (frame_pointer_needed && TARGET_ARM)
21671     {
21672       /* Create the new frame pointer.  */
21673       if (TARGET_APCS_FRAME)
21674         {
21675           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21676           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21677           RTX_FRAME_RELATED_P (insn) = 1;
21678         }
21679       else
21680         {
21681           insn = GEN_INT (saved_regs - (4 + fp_offset));
21682           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21683                                         stack_pointer_rtx, insn));
21684           RTX_FRAME_RELATED_P (insn) = 1;
21685         }
21686     }
21687
21688   size = offsets->outgoing_args - offsets->saved_args;
21689   if (flag_stack_usage_info)
21690     current_function_static_stack_size = size;
21691
21692   /* If this isn't an interrupt service routine and we have a frame, then do
21693      stack checking.  We use IP as the first scratch register, except for the
21694      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21695   if (!IS_INTERRUPT (func_type)
21696       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21697           || flag_stack_clash_protection))
21698     {
21699       unsigned int regno;
21700
21701       if (!IS_NESTED (func_type) || clobber_ip)
21702         regno = IP_REGNUM;
21703       else if (df_regs_ever_live_p (LR_REGNUM))
21704         regno = LR_REGNUM;
21705       else
21706         regno = 3;
21707
21708       if (crtl->is_leaf && !cfun->calls_alloca)
21709         {
21710           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21711             arm_emit_probe_stack_range (get_stack_check_protect (),
21712                                         size - get_stack_check_protect (),
21713                                         regno, live_regs_mask);
21714         }
21715       else if (size > 0)
21716         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21717                                     regno, live_regs_mask);
21718     }
21719
21720   /* Recover the static chain register.  */
21721   if (clobber_ip)
21722     {
21723       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21724         insn = gen_rtx_REG (SImode, 3);
21725       else
21726         {
21727           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21728           insn = gen_frame_mem (SImode, insn);
21729         }
21730       emit_set_insn (ip_rtx, insn);
21731       emit_insn (gen_force_register_use (ip_rtx));
21732     }
21733
21734   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21735     {
21736       /* This add can produce multiple insns for a large constant, so we
21737          need to get tricky.  */
21738       rtx_insn *last = get_last_insn ();
21739
21740       amount = GEN_INT (offsets->saved_args + saved_regs
21741                         - offsets->outgoing_args);
21742
21743       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21744                                     amount));
21745       do
21746         {
21747           last = last ? NEXT_INSN (last) : get_insns ();
21748           RTX_FRAME_RELATED_P (last) = 1;
21749         }
21750       while (last != insn);
21751
21752       /* If the frame pointer is needed, emit a special barrier that
21753          will prevent the scheduler from moving stores to the frame
21754          before the stack adjustment.  */
21755       if (frame_pointer_needed)
21756         emit_insn (gen_stack_tie (stack_pointer_rtx,
21757                                   hard_frame_pointer_rtx));
21758     }
21759
21760
21761   if (frame_pointer_needed && TARGET_THUMB2)
21762     thumb_set_frame_pointer (offsets);
21763
21764   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21765     {
21766       unsigned long mask;
21767
21768       mask = live_regs_mask;
21769       mask &= THUMB2_WORK_REGS;
21770       if (!IS_NESTED (func_type))
21771         mask |= (1 << IP_REGNUM);
21772       arm_load_pic_register (mask);
21773     }
21774
21775   /* If we are profiling, make sure no instructions are scheduled before
21776      the call to mcount.  Similarly if the user has requested no
21777      scheduling in the prolog.  Similarly if we want non-call exceptions
21778      using the EABI unwinder, to prevent faulting instructions from being
21779      swapped with a stack adjustment.  */
21780   if (crtl->profile || !TARGET_SCHED_PROLOG
21781       || (arm_except_unwind_info (&global_options) == UI_TARGET
21782           && cfun->can_throw_non_call_exceptions))
21783     emit_insn (gen_blockage ());
21784
21785   /* If the link register is being kept alive, with the return address in it,
21786      then make sure that it does not get reused by the ce2 pass.  */
21787   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21788     cfun->machine->lr_save_eliminated = 1;
21789 }
21790 \f
21791 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21792 static void
21793 arm_print_condition (FILE *stream)
21794 {
21795   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21796     {
21797       /* Branch conversion is not implemented for Thumb-2.  */
21798       if (TARGET_THUMB)
21799         {
21800           output_operand_lossage ("predicated Thumb instruction");
21801           return;
21802         }
21803       if (current_insn_predicate != NULL)
21804         {
21805           output_operand_lossage
21806             ("predicated instruction in conditional sequence");
21807           return;
21808         }
21809
21810       fputs (arm_condition_codes[arm_current_cc], stream);
21811     }
21812   else if (current_insn_predicate)
21813     {
21814       enum arm_cond_code code;
21815
21816       if (TARGET_THUMB1)
21817         {
21818           output_operand_lossage ("predicated Thumb instruction");
21819           return;
21820         }
21821
21822       code = get_arm_condition_code (current_insn_predicate);
21823       fputs (arm_condition_codes[code], stream);
21824     }
21825 }
21826
21827
21828 /* Globally reserved letters: acln
21829    Puncutation letters currently used: @_|?().!#
21830    Lower case letters currently used: bcdefhimpqtvwxyz
21831    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21832    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21833
21834    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21835
21836    If CODE is 'd', then the X is a condition operand and the instruction
21837    should only be executed if the condition is true.
21838    if CODE is 'D', then the X is a condition operand and the instruction
21839    should only be executed if the condition is false: however, if the mode
21840    of the comparison is CCFPEmode, then always execute the instruction -- we
21841    do this because in these circumstances !GE does not necessarily imply LT;
21842    in these cases the instruction pattern will take care to make sure that
21843    an instruction containing %d will follow, thereby undoing the effects of
21844    doing this instruction unconditionally.
21845    If CODE is 'N' then X is a floating point operand that must be negated
21846    before output.
21847    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21848    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21849 static void
21850 arm_print_operand (FILE *stream, rtx x, int code)
21851 {
21852   switch (code)
21853     {
21854     case '@':
21855       fputs (ASM_COMMENT_START, stream);
21856       return;
21857
21858     case '_':
21859       fputs (user_label_prefix, stream);
21860       return;
21861
21862     case '|':
21863       fputs (REGISTER_PREFIX, stream);
21864       return;
21865
21866     case '?':
21867       arm_print_condition (stream);
21868       return;
21869
21870     case '.':
21871       /* The current condition code for a condition code setting instruction.
21872          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21873       fputc('s', stream);
21874       arm_print_condition (stream);
21875       return;
21876
21877     case '!':
21878       /* If the instruction is conditionally executed then print
21879          the current condition code, otherwise print 's'.  */
21880       gcc_assert (TARGET_THUMB2);
21881       if (current_insn_predicate)
21882         arm_print_condition (stream);
21883       else
21884         fputc('s', stream);
21885       break;
21886
21887     /* %# is a "break" sequence. It doesn't output anything, but is used to
21888        separate e.g. operand numbers from following text, if that text consists
21889        of further digits which we don't want to be part of the operand
21890        number.  */
21891     case '#':
21892       return;
21893
21894     case 'N':
21895       {
21896         REAL_VALUE_TYPE r;
21897         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21898         fprintf (stream, "%s", fp_const_from_val (&r));
21899       }
21900       return;
21901
21902     /* An integer or symbol address without a preceding # sign.  */
21903     case 'c':
21904       switch (GET_CODE (x))
21905         {
21906         case CONST_INT:
21907           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21908           break;
21909
21910         case SYMBOL_REF:
21911           output_addr_const (stream, x);
21912           break;
21913
21914         case CONST:
21915           if (GET_CODE (XEXP (x, 0)) == PLUS
21916               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21917             {
21918               output_addr_const (stream, x);
21919               break;
21920             }
21921           /* Fall through.  */
21922
21923         default:
21924           output_operand_lossage ("Unsupported operand for code '%c'", code);
21925         }
21926       return;
21927
21928     /* An integer that we want to print in HEX.  */
21929     case 'x':
21930       switch (GET_CODE (x))
21931         {
21932         case CONST_INT:
21933           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21934           break;
21935
21936         default:
21937           output_operand_lossage ("Unsupported operand for code '%c'", code);
21938         }
21939       return;
21940
21941     case 'B':
21942       if (CONST_INT_P (x))
21943         {
21944           HOST_WIDE_INT val;
21945           val = ARM_SIGN_EXTEND (~INTVAL (x));
21946           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21947         }
21948       else
21949         {
21950           putc ('~', stream);
21951           output_addr_const (stream, x);
21952         }
21953       return;
21954
21955     case 'b':
21956       /* Print the log2 of a CONST_INT.  */
21957       {
21958         HOST_WIDE_INT val;
21959
21960         if (!CONST_INT_P (x)
21961             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21962           output_operand_lossage ("Unsupported operand for code '%c'", code);
21963         else
21964           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21965       }
21966       return;
21967
21968     case 'L':
21969       /* The low 16 bits of an immediate constant.  */
21970       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21971       return;
21972
21973     case 'i':
21974       fprintf (stream, "%s", arithmetic_instr (x, 1));
21975       return;
21976
21977     case 'I':
21978       fprintf (stream, "%s", arithmetic_instr (x, 0));
21979       return;
21980
21981     case 'S':
21982       {
21983         HOST_WIDE_INT val;
21984         const char *shift;
21985
21986         shift = shift_op (x, &val);
21987
21988         if (shift)
21989           {
21990             fprintf (stream, ", %s ", shift);
21991             if (val == -1)
21992               arm_print_operand (stream, XEXP (x, 1), 0);
21993             else
21994               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21995           }
21996       }
21997       return;
21998
21999       /* An explanation of the 'Q', 'R' and 'H' register operands:
22000
22001          In a pair of registers containing a DI or DF value the 'Q'
22002          operand returns the register number of the register containing
22003          the least significant part of the value.  The 'R' operand returns
22004          the register number of the register containing the most
22005          significant part of the value.
22006
22007          The 'H' operand returns the higher of the two register numbers.
22008          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22009          same as the 'Q' operand, since the most significant part of the
22010          value is held in the lower number register.  The reverse is true
22011          on systems where WORDS_BIG_ENDIAN is false.
22012
22013          The purpose of these operands is to distinguish between cases
22014          where the endian-ness of the values is important (for example
22015          when they are added together), and cases where the endian-ness
22016          is irrelevant, but the order of register operations is important.
22017          For example when loading a value from memory into a register
22018          pair, the endian-ness does not matter.  Provided that the value
22019          from the lower memory address is put into the lower numbered
22020          register, and the value from the higher address is put into the
22021          higher numbered register, the load will work regardless of whether
22022          the value being loaded is big-wordian or little-wordian.  The
22023          order of the two register loads can matter however, if the address
22024          of the memory location is actually held in one of the registers
22025          being overwritten by the load.
22026
22027          The 'Q' and 'R' constraints are also available for 64-bit
22028          constants.  */
22029     case 'Q':
22030       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22031         {
22032           rtx part = gen_lowpart (SImode, x);
22033           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22034           return;
22035         }
22036
22037       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22038         {
22039           output_operand_lossage ("invalid operand for code '%c'", code);
22040           return;
22041         }
22042
22043       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22044       return;
22045
22046     case 'R':
22047       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22048         {
22049           machine_mode mode = GET_MODE (x);
22050           rtx part;
22051
22052           if (mode == VOIDmode)
22053             mode = DImode;
22054           part = gen_highpart_mode (SImode, mode, x);
22055           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22056           return;
22057         }
22058
22059       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060         {
22061           output_operand_lossage ("invalid operand for code '%c'", code);
22062           return;
22063         }
22064
22065       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22066       return;
22067
22068     case 'H':
22069       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22070         {
22071           output_operand_lossage ("invalid operand for code '%c'", code);
22072           return;
22073         }
22074
22075       asm_fprintf (stream, "%r", REGNO (x) + 1);
22076       return;
22077
22078     case 'J':
22079       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22080         {
22081           output_operand_lossage ("invalid operand for code '%c'", code);
22082           return;
22083         }
22084
22085       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22086       return;
22087
22088     case 'K':
22089       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22090         {
22091           output_operand_lossage ("invalid operand for code '%c'", code);
22092           return;
22093         }
22094
22095       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22096       return;
22097
22098     case 'm':
22099       asm_fprintf (stream, "%r",
22100                    REG_P (XEXP (x, 0))
22101                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22102       return;
22103
22104     case 'M':
22105       asm_fprintf (stream, "{%r-%r}",
22106                    REGNO (x),
22107                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22108       return;
22109
22110     /* Like 'M', but writing doubleword vector registers, for use by Neon
22111        insns.  */
22112     case 'h':
22113       {
22114         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22115         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22116         if (numregs == 1)
22117           asm_fprintf (stream, "{d%d}", regno);
22118         else
22119           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22120       }
22121       return;
22122
22123     case 'd':
22124       /* CONST_TRUE_RTX means always -- that's the default.  */
22125       if (x == const_true_rtx)
22126         return;
22127
22128       if (!COMPARISON_P (x))
22129         {
22130           output_operand_lossage ("invalid operand for code '%c'", code);
22131           return;
22132         }
22133
22134       fputs (arm_condition_codes[get_arm_condition_code (x)],
22135              stream);
22136       return;
22137
22138     case 'D':
22139       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22140          want to do that.  */
22141       if (x == const_true_rtx)
22142         {
22143           output_operand_lossage ("instruction never executed");
22144           return;
22145         }
22146       if (!COMPARISON_P (x))
22147         {
22148           output_operand_lossage ("invalid operand for code '%c'", code);
22149           return;
22150         }
22151
22152       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22153                                  (get_arm_condition_code (x))],
22154              stream);
22155       return;
22156
22157     case 's':
22158     case 'V':
22159     case 'W':
22160     case 'X':
22161     case 'Y':
22162     case 'Z':
22163       /* Former Maverick support, removed after GCC-4.7.  */
22164       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22165       return;
22166
22167     case 'U':
22168       if (!REG_P (x)
22169           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22170           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22171         /* Bad value for wCG register number.  */
22172         {
22173           output_operand_lossage ("invalid operand for code '%c'", code);
22174           return;
22175         }
22176
22177       else
22178         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22179       return;
22180
22181       /* Print an iWMMXt control register name.  */
22182     case 'w':
22183       if (!CONST_INT_P (x)
22184           || INTVAL (x) < 0
22185           || INTVAL (x) >= 16)
22186         /* Bad value for wC register number.  */
22187         {
22188           output_operand_lossage ("invalid operand for code '%c'", code);
22189           return;
22190         }
22191
22192       else
22193         {
22194           static const char * wc_reg_names [16] =
22195             {
22196               "wCID",  "wCon",  "wCSSF", "wCASF",
22197               "wC4",   "wC5",   "wC6",   "wC7",
22198               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22199               "wC12",  "wC13",  "wC14",  "wC15"
22200             };
22201
22202           fputs (wc_reg_names [INTVAL (x)], stream);
22203         }
22204       return;
22205
22206     /* Print the high single-precision register of a VFP double-precision
22207        register.  */
22208     case 'p':
22209       {
22210         machine_mode mode = GET_MODE (x);
22211         int regno;
22212
22213         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22214           {
22215             output_operand_lossage ("invalid operand for code '%c'", code);
22216             return;
22217           }
22218
22219         regno = REGNO (x);
22220         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22221           {
22222             output_operand_lossage ("invalid operand for code '%c'", code);
22223             return;
22224           }
22225
22226         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22227       }
22228       return;
22229
22230     /* Print a VFP/Neon double precision or quad precision register name.  */
22231     case 'P':
22232     case 'q':
22233       {
22234         machine_mode mode = GET_MODE (x);
22235         int is_quad = (code == 'q');
22236         int regno;
22237
22238         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22239           {
22240             output_operand_lossage ("invalid operand for code '%c'", code);
22241             return;
22242           }
22243
22244         if (!REG_P (x)
22245             || !IS_VFP_REGNUM (REGNO (x)))
22246           {
22247             output_operand_lossage ("invalid operand for code '%c'", code);
22248             return;
22249           }
22250
22251         regno = REGNO (x);
22252         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22253             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22254           {
22255             output_operand_lossage ("invalid operand for code '%c'", code);
22256             return;
22257           }
22258
22259         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22260           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22261       }
22262       return;
22263
22264     /* These two codes print the low/high doubleword register of a Neon quad
22265        register, respectively.  For pair-structure types, can also print
22266        low/high quadword registers.  */
22267     case 'e':
22268     case 'f':
22269       {
22270         machine_mode mode = GET_MODE (x);
22271         int regno;
22272
22273         if ((GET_MODE_SIZE (mode) != 16
22274              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22275           {
22276             output_operand_lossage ("invalid operand for code '%c'", code);
22277             return;
22278           }
22279
22280         regno = REGNO (x);
22281         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22282           {
22283             output_operand_lossage ("invalid operand for code '%c'", code);
22284             return;
22285           }
22286
22287         if (GET_MODE_SIZE (mode) == 16)
22288           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22289                                   + (code == 'f' ? 1 : 0));
22290         else
22291           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22292                                   + (code == 'f' ? 1 : 0));
22293       }
22294       return;
22295
22296     /* Print a VFPv3 floating-point constant, represented as an integer
22297        index.  */
22298     case 'G':
22299       {
22300         int index = vfp3_const_double_index (x);
22301         gcc_assert (index != -1);
22302         fprintf (stream, "%d", index);
22303       }
22304       return;
22305
22306     /* Print bits representing opcode features for Neon.
22307
22308        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22309        and polynomials as unsigned.
22310
22311        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22312
22313        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22314
22315     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22316     case 'T':
22317       {
22318         HOST_WIDE_INT bits = INTVAL (x);
22319         fputc ("uspf"[bits & 3], stream);
22320       }
22321       return;
22322
22323     /* Likewise, but signed and unsigned integers are both 'i'.  */
22324     case 'F':
22325       {
22326         HOST_WIDE_INT bits = INTVAL (x);
22327         fputc ("iipf"[bits & 3], stream);
22328       }
22329       return;
22330
22331     /* As for 'T', but emit 'u' instead of 'p'.  */
22332     case 't':
22333       {
22334         HOST_WIDE_INT bits = INTVAL (x);
22335         fputc ("usuf"[bits & 3], stream);
22336       }
22337       return;
22338
22339     /* Bit 2: rounding (vs none).  */
22340     case 'O':
22341       {
22342         HOST_WIDE_INT bits = INTVAL (x);
22343         fputs ((bits & 4) != 0 ? "r" : "", stream);
22344       }
22345       return;
22346
22347     /* Memory operand for vld1/vst1 instruction.  */
22348     case 'A':
22349       {
22350         rtx addr;
22351         bool postinc = FALSE;
22352         rtx postinc_reg = NULL;
22353         unsigned align, memsize, align_bits;
22354
22355         gcc_assert (MEM_P (x));
22356         addr = XEXP (x, 0);
22357         if (GET_CODE (addr) == POST_INC)
22358           {
22359             postinc = 1;
22360             addr = XEXP (addr, 0);
22361           }
22362         if (GET_CODE (addr) == POST_MODIFY)
22363           {
22364             postinc_reg = XEXP( XEXP (addr, 1), 1);
22365             addr = XEXP (addr, 0);
22366           }
22367         asm_fprintf (stream, "[%r", REGNO (addr));
22368
22369         /* We know the alignment of this access, so we can emit a hint in the
22370            instruction (for some alignments) as an aid to the memory subsystem
22371            of the target.  */
22372         align = MEM_ALIGN (x) >> 3;
22373         memsize = MEM_SIZE (x);
22374
22375         /* Only certain alignment specifiers are supported by the hardware.  */
22376         if (memsize == 32 && (align % 32) == 0)
22377           align_bits = 256;
22378         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22379           align_bits = 128;
22380         else if (memsize >= 8 && (align % 8) == 0)
22381           align_bits = 64;
22382         else
22383           align_bits = 0;
22384
22385         if (align_bits != 0)
22386           asm_fprintf (stream, ":%d", align_bits);
22387
22388         asm_fprintf (stream, "]");
22389
22390         if (postinc)
22391           fputs("!", stream);
22392         if (postinc_reg)
22393           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22394       }
22395       return;
22396
22397     case 'C':
22398       {
22399         rtx addr;
22400
22401         gcc_assert (MEM_P (x));
22402         addr = XEXP (x, 0);
22403         gcc_assert (REG_P (addr));
22404         asm_fprintf (stream, "[%r]", REGNO (addr));
22405       }
22406       return;
22407
22408     /* Translate an S register number into a D register number and element index.  */
22409     case 'y':
22410       {
22411         machine_mode mode = GET_MODE (x);
22412         int regno;
22413
22414         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22415           {
22416             output_operand_lossage ("invalid operand for code '%c'", code);
22417             return;
22418           }
22419
22420         regno = REGNO (x);
22421         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22422           {
22423             output_operand_lossage ("invalid operand for code '%c'", code);
22424             return;
22425           }
22426
22427         regno = regno - FIRST_VFP_REGNUM;
22428         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22429       }
22430       return;
22431
22432     case 'v':
22433         gcc_assert (CONST_DOUBLE_P (x));
22434         int result;
22435         result = vfp3_const_double_for_fract_bits (x);
22436         if (result == 0)
22437           result = vfp3_const_double_for_bits (x);
22438         fprintf (stream, "#%d", result);
22439         return;
22440
22441     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22442        number into a D register number and element index.  */
22443     case 'z':
22444       {
22445         machine_mode mode = GET_MODE (x);
22446         int regno;
22447
22448         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22449           {
22450             output_operand_lossage ("invalid operand for code '%c'", code);
22451             return;
22452           }
22453
22454         regno = REGNO (x);
22455         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22456           {
22457             output_operand_lossage ("invalid operand for code '%c'", code);
22458             return;
22459           }
22460
22461         regno = regno - FIRST_VFP_REGNUM;
22462         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22463       }
22464       return;
22465
22466     default:
22467       if (x == 0)
22468         {
22469           output_operand_lossage ("missing operand");
22470           return;
22471         }
22472
22473       switch (GET_CODE (x))
22474         {
22475         case REG:
22476           asm_fprintf (stream, "%r", REGNO (x));
22477           break;
22478
22479         case MEM:
22480           output_address (GET_MODE (x), XEXP (x, 0));
22481           break;
22482
22483         case CONST_DOUBLE:
22484           {
22485             char fpstr[20];
22486             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22487                               sizeof (fpstr), 0, 1);
22488             fprintf (stream, "#%s", fpstr);
22489           }
22490           break;
22491
22492         default:
22493           gcc_assert (GET_CODE (x) != NEG);
22494           fputc ('#', stream);
22495           if (GET_CODE (x) == HIGH)
22496             {
22497               fputs (":lower16:", stream);
22498               x = XEXP (x, 0);
22499             }
22500
22501           output_addr_const (stream, x);
22502           break;
22503         }
22504     }
22505 }
22506 \f
22507 /* Target hook for printing a memory address.  */
22508 static void
22509 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22510 {
22511   if (TARGET_32BIT)
22512     {
22513       int is_minus = GET_CODE (x) == MINUS;
22514
22515       if (REG_P (x))
22516         asm_fprintf (stream, "[%r]", REGNO (x));
22517       else if (GET_CODE (x) == PLUS || is_minus)
22518         {
22519           rtx base = XEXP (x, 0);
22520           rtx index = XEXP (x, 1);
22521           HOST_WIDE_INT offset = 0;
22522           if (!REG_P (base)
22523               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22524             {
22525               /* Ensure that BASE is a register.  */
22526               /* (one of them must be).  */
22527               /* Also ensure the SP is not used as in index register.  */
22528               std::swap (base, index);
22529             }
22530           switch (GET_CODE (index))
22531             {
22532             case CONST_INT:
22533               offset = INTVAL (index);
22534               if (is_minus)
22535                 offset = -offset;
22536               asm_fprintf (stream, "[%r, #%wd]",
22537                            REGNO (base), offset);
22538               break;
22539
22540             case REG:
22541               asm_fprintf (stream, "[%r, %s%r]",
22542                            REGNO (base), is_minus ? "-" : "",
22543                            REGNO (index));
22544               break;
22545
22546             case MULT:
22547             case ASHIFTRT:
22548             case LSHIFTRT:
22549             case ASHIFT:
22550             case ROTATERT:
22551               {
22552                 asm_fprintf (stream, "[%r, %s%r",
22553                              REGNO (base), is_minus ? "-" : "",
22554                              REGNO (XEXP (index, 0)));
22555                 arm_print_operand (stream, index, 'S');
22556                 fputs ("]", stream);
22557                 break;
22558               }
22559
22560             default:
22561               gcc_unreachable ();
22562             }
22563         }
22564       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22565                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22566         {
22567           gcc_assert (REG_P (XEXP (x, 0)));
22568
22569           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22570             asm_fprintf (stream, "[%r, #%s%d]!",
22571                          REGNO (XEXP (x, 0)),
22572                          GET_CODE (x) == PRE_DEC ? "-" : "",
22573                          GET_MODE_SIZE (mode));
22574           else
22575             asm_fprintf (stream, "[%r], #%s%d",
22576                          REGNO (XEXP (x, 0)),
22577                          GET_CODE (x) == POST_DEC ? "-" : "",
22578                          GET_MODE_SIZE (mode));
22579         }
22580       else if (GET_CODE (x) == PRE_MODIFY)
22581         {
22582           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22583           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22584             asm_fprintf (stream, "#%wd]!",
22585                          INTVAL (XEXP (XEXP (x, 1), 1)));
22586           else
22587             asm_fprintf (stream, "%r]!",
22588                          REGNO (XEXP (XEXP (x, 1), 1)));
22589         }
22590       else if (GET_CODE (x) == POST_MODIFY)
22591         {
22592           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22593           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22594             asm_fprintf (stream, "#%wd",
22595                          INTVAL (XEXP (XEXP (x, 1), 1)));
22596           else
22597             asm_fprintf (stream, "%r",
22598                          REGNO (XEXP (XEXP (x, 1), 1)));
22599         }
22600       else output_addr_const (stream, x);
22601     }
22602   else
22603     {
22604       if (REG_P (x))
22605         asm_fprintf (stream, "[%r]", REGNO (x));
22606       else if (GET_CODE (x) == POST_INC)
22607         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22608       else if (GET_CODE (x) == PLUS)
22609         {
22610           gcc_assert (REG_P (XEXP (x, 0)));
22611           if (CONST_INT_P (XEXP (x, 1)))
22612             asm_fprintf (stream, "[%r, #%wd]",
22613                          REGNO (XEXP (x, 0)),
22614                          INTVAL (XEXP (x, 1)));
22615           else
22616             asm_fprintf (stream, "[%r, %r]",
22617                          REGNO (XEXP (x, 0)),
22618                          REGNO (XEXP (x, 1)));
22619         }
22620       else
22621         output_addr_const (stream, x);
22622     }
22623 }
22624 \f
22625 /* Target hook for indicating whether a punctuation character for
22626    TARGET_PRINT_OPERAND is valid.  */
22627 static bool
22628 arm_print_operand_punct_valid_p (unsigned char code)
22629 {
22630   return (code == '@' || code == '|' || code == '.'
22631           || code == '(' || code == ')' || code == '#'
22632           || (TARGET_32BIT && (code == '?'))
22633           || (TARGET_THUMB2 && (code == '!'))
22634           || (TARGET_THUMB && (code == '_')));
22635 }
22636 \f
22637 /* Target hook for assembling integer objects.  The ARM version needs to
22638    handle word-sized values specially.  */
22639 static bool
22640 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22641 {
22642   machine_mode mode;
22643
22644   if (size == UNITS_PER_WORD && aligned_p)
22645     {
22646       fputs ("\t.word\t", asm_out_file);
22647       output_addr_const (asm_out_file, x);
22648
22649       /* Mark symbols as position independent.  We only do this in the
22650          .text segment, not in the .data segment.  */
22651       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22652           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22653         {
22654           /* See legitimize_pic_address for an explanation of the
22655              TARGET_VXWORKS_RTP check.  */
22656           /* References to weak symbols cannot be resolved locally:
22657              they may be overridden by a non-weak definition at link
22658              time.  */
22659           if (!arm_pic_data_is_text_relative
22660               || (GET_CODE (x) == SYMBOL_REF
22661                   && (!SYMBOL_REF_LOCAL_P (x)
22662                       || (SYMBOL_REF_DECL (x)
22663                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22664             fputs ("(GOT)", asm_out_file);
22665           else
22666             fputs ("(GOTOFF)", asm_out_file);
22667         }
22668       fputc ('\n', asm_out_file);
22669       return true;
22670     }
22671
22672   mode = GET_MODE (x);
22673
22674   if (arm_vector_mode_supported_p (mode))
22675     {
22676       int i, units;
22677
22678       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22679
22680       units = CONST_VECTOR_NUNITS (x);
22681       size = GET_MODE_UNIT_SIZE (mode);
22682
22683       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22684         for (i = 0; i < units; i++)
22685           {
22686             rtx elt = CONST_VECTOR_ELT (x, i);
22687             assemble_integer
22688               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22689           }
22690       else
22691         for (i = 0; i < units; i++)
22692           {
22693             rtx elt = CONST_VECTOR_ELT (x, i);
22694             assemble_real
22695               (*CONST_DOUBLE_REAL_VALUE (elt),
22696                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22697                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22698           }
22699
22700       return true;
22701     }
22702
22703   return default_assemble_integer (x, size, aligned_p);
22704 }
22705
22706 static void
22707 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22708 {
22709   section *s;
22710
22711   if (!TARGET_AAPCS_BASED)
22712     {
22713       (is_ctor ?
22714        default_named_section_asm_out_constructor
22715        : default_named_section_asm_out_destructor) (symbol, priority);
22716       return;
22717     }
22718
22719   /* Put these in the .init_array section, using a special relocation.  */
22720   if (priority != DEFAULT_INIT_PRIORITY)
22721     {
22722       char buf[18];
22723       sprintf (buf, "%s.%.5u",
22724                is_ctor ? ".init_array" : ".fini_array",
22725                priority);
22726       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22727     }
22728   else if (is_ctor)
22729     s = ctors_section;
22730   else
22731     s = dtors_section;
22732
22733   switch_to_section (s);
22734   assemble_align (POINTER_SIZE);
22735   fputs ("\t.word\t", asm_out_file);
22736   output_addr_const (asm_out_file, symbol);
22737   fputs ("(target1)\n", asm_out_file);
22738 }
22739
22740 /* Add a function to the list of static constructors.  */
22741
22742 static void
22743 arm_elf_asm_constructor (rtx symbol, int priority)
22744 {
22745   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22746 }
22747
22748 /* Add a function to the list of static destructors.  */
22749
22750 static void
22751 arm_elf_asm_destructor (rtx symbol, int priority)
22752 {
22753   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22754 }
22755 \f
22756 /* A finite state machine takes care of noticing whether or not instructions
22757    can be conditionally executed, and thus decrease execution time and code
22758    size by deleting branch instructions.  The fsm is controlled by
22759    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22760
22761 /* The state of the fsm controlling condition codes are:
22762    0: normal, do nothing special
22763    1: make ASM_OUTPUT_OPCODE not output this instruction
22764    2: make ASM_OUTPUT_OPCODE not output this instruction
22765    3: make instructions conditional
22766    4: make instructions conditional
22767
22768    State transitions (state->state by whom under condition):
22769    0 -> 1 final_prescan_insn if the `target' is a label
22770    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22771    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22772    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22773    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22774           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22775    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22776           (the target insn is arm_target_insn).
22777
22778    If the jump clobbers the conditions then we use states 2 and 4.
22779
22780    A similar thing can be done with conditional return insns.
22781
22782    XXX In case the `target' is an unconditional branch, this conditionalising
22783    of the instructions always reduces code size, but not always execution
22784    time.  But then, I want to reduce the code size to somewhere near what
22785    /bin/cc produces.  */
22786
22787 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22788    instructions.  When a COND_EXEC instruction is seen the subsequent
22789    instructions are scanned so that multiple conditional instructions can be
22790    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22791    specify the length and true/false mask for the IT block.  These will be
22792    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22793
22794 /* Returns the index of the ARM condition code string in
22795    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22796    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22797
22798 enum arm_cond_code
22799 maybe_get_arm_condition_code (rtx comparison)
22800 {
22801   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22802   enum arm_cond_code code;
22803   enum rtx_code comp_code = GET_CODE (comparison);
22804
22805   if (GET_MODE_CLASS (mode) != MODE_CC)
22806     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22807                            XEXP (comparison, 1));
22808
22809   switch (mode)
22810     {
22811     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22812     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22813     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22814     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22815     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22816     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22817     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22818     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22819     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22820     case E_CC_DLTUmode: code = ARM_CC;
22821
22822     dominance:
22823       if (comp_code == EQ)
22824         return ARM_INVERSE_CONDITION_CODE (code);
22825       if (comp_code == NE)
22826         return code;
22827       return ARM_NV;
22828
22829     case E_CC_NOOVmode:
22830       switch (comp_code)
22831         {
22832         case NE: return ARM_NE;
22833         case EQ: return ARM_EQ;
22834         case GE: return ARM_PL;
22835         case LT: return ARM_MI;
22836         default: return ARM_NV;
22837         }
22838
22839     case E_CC_Zmode:
22840       switch (comp_code)
22841         {
22842         case NE: return ARM_NE;
22843         case EQ: return ARM_EQ;
22844         default: return ARM_NV;
22845         }
22846
22847     case E_CC_Nmode:
22848       switch (comp_code)
22849         {
22850         case NE: return ARM_MI;
22851         case EQ: return ARM_PL;
22852         default: return ARM_NV;
22853         }
22854
22855     case E_CCFPEmode:
22856     case E_CCFPmode:
22857       /* We can handle all cases except UNEQ and LTGT.  */
22858       switch (comp_code)
22859         {
22860         case GE: return ARM_GE;
22861         case GT: return ARM_GT;
22862         case LE: return ARM_LS;
22863         case LT: return ARM_MI;
22864         case NE: return ARM_NE;
22865         case EQ: return ARM_EQ;
22866         case ORDERED: return ARM_VC;
22867         case UNORDERED: return ARM_VS;
22868         case UNLT: return ARM_LT;
22869         case UNLE: return ARM_LE;
22870         case UNGT: return ARM_HI;
22871         case UNGE: return ARM_PL;
22872           /* UNEQ and LTGT do not have a representation.  */
22873         case UNEQ: /* Fall through.  */
22874         case LTGT: /* Fall through.  */
22875         default: return ARM_NV;
22876         }
22877
22878     case E_CC_SWPmode:
22879       switch (comp_code)
22880         {
22881         case NE: return ARM_NE;
22882         case EQ: return ARM_EQ;
22883         case GE: return ARM_LE;
22884         case GT: return ARM_LT;
22885         case LE: return ARM_GE;
22886         case LT: return ARM_GT;
22887         case GEU: return ARM_LS;
22888         case GTU: return ARM_CC;
22889         case LEU: return ARM_CS;
22890         case LTU: return ARM_HI;
22891         default: return ARM_NV;
22892         }
22893
22894     case E_CC_Cmode:
22895       switch (comp_code)
22896         {
22897         case LTU: return ARM_CS;
22898         case GEU: return ARM_CC;
22899         case NE: return ARM_CS;
22900         case EQ: return ARM_CC;
22901         default: return ARM_NV;
22902         }
22903
22904     case E_CC_CZmode:
22905       switch (comp_code)
22906         {
22907         case NE: return ARM_NE;
22908         case EQ: return ARM_EQ;
22909         case GEU: return ARM_CS;
22910         case GTU: return ARM_HI;
22911         case LEU: return ARM_LS;
22912         case LTU: return ARM_CC;
22913         default: return ARM_NV;
22914         }
22915
22916     case E_CC_NCVmode:
22917       switch (comp_code)
22918         {
22919         case GE: return ARM_GE;
22920         case LT: return ARM_LT;
22921         case GEU: return ARM_CS;
22922         case LTU: return ARM_CC;
22923         default: return ARM_NV;
22924         }
22925
22926     case E_CC_Vmode:
22927       switch (comp_code)
22928         {
22929         case NE: return ARM_VS;
22930         case EQ: return ARM_VC;
22931         default: return ARM_NV;
22932         }
22933
22934     case E_CCmode:
22935       switch (comp_code)
22936         {
22937         case NE: return ARM_NE;
22938         case EQ: return ARM_EQ;
22939         case GE: return ARM_GE;
22940         case GT: return ARM_GT;
22941         case LE: return ARM_LE;
22942         case LT: return ARM_LT;
22943         case GEU: return ARM_CS;
22944         case GTU: return ARM_HI;
22945         case LEU: return ARM_LS;
22946         case LTU: return ARM_CC;
22947         default: return ARM_NV;
22948         }
22949
22950     default: gcc_unreachable ();
22951     }
22952 }
22953
22954 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22955 static enum arm_cond_code
22956 get_arm_condition_code (rtx comparison)
22957 {
22958   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22959   gcc_assert (code != ARM_NV);
22960   return code;
22961 }
22962
22963 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22964    code registers when not targetting Thumb1.  The VFP condition register
22965    only exists when generating hard-float code.  */
22966 static bool
22967 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22968 {
22969   if (!TARGET_32BIT)
22970     return false;
22971
22972   *p1 = CC_REGNUM;
22973   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22974   return true;
22975 }
22976
22977 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22978    instructions.  */
22979 void
22980 thumb2_final_prescan_insn (rtx_insn *insn)
22981 {
22982   rtx_insn *first_insn = insn;
22983   rtx body = PATTERN (insn);
22984   rtx predicate;
22985   enum arm_cond_code code;
22986   int n;
22987   int mask;
22988   int max;
22989
22990   /* max_insns_skipped in the tune was already taken into account in the
22991      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22992      just emit the IT blocks as we can.  It does not make sense to split
22993      the IT blocks.  */
22994   max = MAX_INSN_PER_IT_BLOCK;
22995
22996   /* Remove the previous insn from the count of insns to be output.  */
22997   if (arm_condexec_count)
22998       arm_condexec_count--;
22999
23000   /* Nothing to do if we are already inside a conditional block.  */
23001   if (arm_condexec_count)
23002     return;
23003
23004   if (GET_CODE (body) != COND_EXEC)
23005     return;
23006
23007   /* Conditional jumps are implemented directly.  */
23008   if (JUMP_P (insn))
23009     return;
23010
23011   predicate = COND_EXEC_TEST (body);
23012   arm_current_cc = get_arm_condition_code (predicate);
23013
23014   n = get_attr_ce_count (insn);
23015   arm_condexec_count = 1;
23016   arm_condexec_mask = (1 << n) - 1;
23017   arm_condexec_masklen = n;
23018   /* See if subsequent instructions can be combined into the same block.  */
23019   for (;;)
23020     {
23021       insn = next_nonnote_insn (insn);
23022
23023       /* Jumping into the middle of an IT block is illegal, so a label or
23024          barrier terminates the block.  */
23025       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23026         break;
23027
23028       body = PATTERN (insn);
23029       /* USE and CLOBBER aren't really insns, so just skip them.  */
23030       if (GET_CODE (body) == USE
23031           || GET_CODE (body) == CLOBBER)
23032         continue;
23033
23034       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23035       if (GET_CODE (body) != COND_EXEC)
23036         break;
23037       /* Maximum number of conditionally executed instructions in a block.  */
23038       n = get_attr_ce_count (insn);
23039       if (arm_condexec_masklen + n > max)
23040         break;
23041
23042       predicate = COND_EXEC_TEST (body);
23043       code = get_arm_condition_code (predicate);
23044       mask = (1 << n) - 1;
23045       if (arm_current_cc == code)
23046         arm_condexec_mask |= (mask << arm_condexec_masklen);
23047       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23048         break;
23049
23050       arm_condexec_count++;
23051       arm_condexec_masklen += n;
23052
23053       /* A jump must be the last instruction in a conditional block.  */
23054       if (JUMP_P (insn))
23055         break;
23056     }
23057   /* Restore recog_data (getting the attributes of other insns can
23058      destroy this array, but final.c assumes that it remains intact
23059      across this call).  */
23060   extract_constrain_insn_cached (first_insn);
23061 }
23062
23063 void
23064 arm_final_prescan_insn (rtx_insn *insn)
23065 {
23066   /* BODY will hold the body of INSN.  */
23067   rtx body = PATTERN (insn);
23068
23069   /* This will be 1 if trying to repeat the trick, and things need to be
23070      reversed if it appears to fail.  */
23071   int reverse = 0;
23072
23073   /* If we start with a return insn, we only succeed if we find another one.  */
23074   int seeking_return = 0;
23075   enum rtx_code return_code = UNKNOWN;
23076
23077   /* START_INSN will hold the insn from where we start looking.  This is the
23078      first insn after the following code_label if REVERSE is true.  */
23079   rtx_insn *start_insn = insn;
23080
23081   /* If in state 4, check if the target branch is reached, in order to
23082      change back to state 0.  */
23083   if (arm_ccfsm_state == 4)
23084     {
23085       if (insn == arm_target_insn)
23086         {
23087           arm_target_insn = NULL;
23088           arm_ccfsm_state = 0;
23089         }
23090       return;
23091     }
23092
23093   /* If in state 3, it is possible to repeat the trick, if this insn is an
23094      unconditional branch to a label, and immediately following this branch
23095      is the previous target label which is only used once, and the label this
23096      branch jumps to is not too far off.  */
23097   if (arm_ccfsm_state == 3)
23098     {
23099       if (simplejump_p (insn))
23100         {
23101           start_insn = next_nonnote_insn (start_insn);
23102           if (BARRIER_P (start_insn))
23103             {
23104               /* XXX Isn't this always a barrier?  */
23105               start_insn = next_nonnote_insn (start_insn);
23106             }
23107           if (LABEL_P (start_insn)
23108               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23109               && LABEL_NUSES (start_insn) == 1)
23110             reverse = TRUE;
23111           else
23112             return;
23113         }
23114       else if (ANY_RETURN_P (body))
23115         {
23116           start_insn = next_nonnote_insn (start_insn);
23117           if (BARRIER_P (start_insn))
23118             start_insn = next_nonnote_insn (start_insn);
23119           if (LABEL_P (start_insn)
23120               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23121               && LABEL_NUSES (start_insn) == 1)
23122             {
23123               reverse = TRUE;
23124               seeking_return = 1;
23125               return_code = GET_CODE (body);
23126             }
23127           else
23128             return;
23129         }
23130       else
23131         return;
23132     }
23133
23134   gcc_assert (!arm_ccfsm_state || reverse);
23135   if (!JUMP_P (insn))
23136     return;
23137
23138   /* This jump might be paralleled with a clobber of the condition codes
23139      the jump should always come first */
23140   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23141     body = XVECEXP (body, 0, 0);
23142
23143   if (reverse
23144       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23145           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23146     {
23147       int insns_skipped;
23148       int fail = FALSE, succeed = FALSE;
23149       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23150       int then_not_else = TRUE;
23151       rtx_insn *this_insn = start_insn;
23152       rtx label = 0;
23153
23154       /* Register the insn jumped to.  */
23155       if (reverse)
23156         {
23157           if (!seeking_return)
23158             label = XEXP (SET_SRC (body), 0);
23159         }
23160       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23161         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23162       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23163         {
23164           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23165           then_not_else = FALSE;
23166         }
23167       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23168         {
23169           seeking_return = 1;
23170           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23171         }
23172       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23173         {
23174           seeking_return = 1;
23175           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23176           then_not_else = FALSE;
23177         }
23178       else
23179         gcc_unreachable ();
23180
23181       /* See how many insns this branch skips, and what kind of insns.  If all
23182          insns are okay, and the label or unconditional branch to the same
23183          label is not too far away, succeed.  */
23184       for (insns_skipped = 0;
23185            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23186         {
23187           rtx scanbody;
23188
23189           this_insn = next_nonnote_insn (this_insn);
23190           if (!this_insn)
23191             break;
23192
23193           switch (GET_CODE (this_insn))
23194             {
23195             case CODE_LABEL:
23196               /* Succeed if it is the target label, otherwise fail since
23197                  control falls in from somewhere else.  */
23198               if (this_insn == label)
23199                 {
23200                   arm_ccfsm_state = 1;
23201                   succeed = TRUE;
23202                 }
23203               else
23204                 fail = TRUE;
23205               break;
23206
23207             case BARRIER:
23208               /* Succeed if the following insn is the target label.
23209                  Otherwise fail.
23210                  If return insns are used then the last insn in a function
23211                  will be a barrier.  */
23212               this_insn = next_nonnote_insn (this_insn);
23213               if (this_insn && this_insn == label)
23214                 {
23215                   arm_ccfsm_state = 1;
23216                   succeed = TRUE;
23217                 }
23218               else
23219                 fail = TRUE;
23220               break;
23221
23222             case CALL_INSN:
23223               /* The AAPCS says that conditional calls should not be
23224                  used since they make interworking inefficient (the
23225                  linker can't transform BL<cond> into BLX).  That's
23226                  only a problem if the machine has BLX.  */
23227               if (arm_arch5)
23228                 {
23229                   fail = TRUE;
23230                   break;
23231                 }
23232
23233               /* Succeed if the following insn is the target label, or
23234                  if the following two insns are a barrier and the
23235                  target label.  */
23236               this_insn = next_nonnote_insn (this_insn);
23237               if (this_insn && BARRIER_P (this_insn))
23238                 this_insn = next_nonnote_insn (this_insn);
23239
23240               if (this_insn && this_insn == label
23241                   && insns_skipped < max_insns_skipped)
23242                 {
23243                   arm_ccfsm_state = 1;
23244                   succeed = TRUE;
23245                 }
23246               else
23247                 fail = TRUE;
23248               break;
23249
23250             case JUMP_INSN:
23251               /* If this is an unconditional branch to the same label, succeed.
23252                  If it is to another label, do nothing.  If it is conditional,
23253                  fail.  */
23254               /* XXX Probably, the tests for SET and the PC are
23255                  unnecessary.  */
23256
23257               scanbody = PATTERN (this_insn);
23258               if (GET_CODE (scanbody) == SET
23259                   && GET_CODE (SET_DEST (scanbody)) == PC)
23260                 {
23261                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23262                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23263                     {
23264                       arm_ccfsm_state = 2;
23265                       succeed = TRUE;
23266                     }
23267                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23268                     fail = TRUE;
23269                 }
23270               /* Fail if a conditional return is undesirable (e.g. on a
23271                  StrongARM), but still allow this if optimizing for size.  */
23272               else if (GET_CODE (scanbody) == return_code
23273                        && !use_return_insn (TRUE, NULL)
23274                        && !optimize_size)
23275                 fail = TRUE;
23276               else if (GET_CODE (scanbody) == return_code)
23277                 {
23278                   arm_ccfsm_state = 2;
23279                   succeed = TRUE;
23280                 }
23281               else if (GET_CODE (scanbody) == PARALLEL)
23282                 {
23283                   switch (get_attr_conds (this_insn))
23284                     {
23285                     case CONDS_NOCOND:
23286                       break;
23287                     default:
23288                       fail = TRUE;
23289                       break;
23290                     }
23291                 }
23292               else
23293                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23294
23295               break;
23296
23297             case INSN:
23298               /* Instructions using or affecting the condition codes make it
23299                  fail.  */
23300               scanbody = PATTERN (this_insn);
23301               if (!(GET_CODE (scanbody) == SET
23302                     || GET_CODE (scanbody) == PARALLEL)
23303                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23304                 fail = TRUE;
23305               break;
23306
23307             default:
23308               break;
23309             }
23310         }
23311       if (succeed)
23312         {
23313           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23314             arm_target_label = CODE_LABEL_NUMBER (label);
23315           else
23316             {
23317               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23318
23319               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23320                 {
23321                   this_insn = next_nonnote_insn (this_insn);
23322                   gcc_assert (!this_insn
23323                               || (!BARRIER_P (this_insn)
23324                                   && !LABEL_P (this_insn)));
23325                 }
23326               if (!this_insn)
23327                 {
23328                   /* Oh, dear! we ran off the end.. give up.  */
23329                   extract_constrain_insn_cached (insn);
23330                   arm_ccfsm_state = 0;
23331                   arm_target_insn = NULL;
23332                   return;
23333                 }
23334               arm_target_insn = this_insn;
23335             }
23336
23337           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23338              what it was.  */
23339           if (!reverse)
23340             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23341
23342           if (reverse || then_not_else)
23343             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23344         }
23345
23346       /* Restore recog_data (getting the attributes of other insns can
23347          destroy this array, but final.c assumes that it remains intact
23348          across this call.  */
23349       extract_constrain_insn_cached (insn);
23350     }
23351 }
23352
23353 /* Output IT instructions.  */
23354 void
23355 thumb2_asm_output_opcode (FILE * stream)
23356 {
23357   char buff[5];
23358   int n;
23359
23360   if (arm_condexec_mask)
23361     {
23362       for (n = 0; n < arm_condexec_masklen; n++)
23363         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23364       buff[n] = 0;
23365       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23366                   arm_condition_codes[arm_current_cc]);
23367       arm_condexec_mask = 0;
23368     }
23369 }
23370
23371 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23372    UNITS_PER_WORD bytes wide.  */
23373 static unsigned int
23374 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23375 {
23376   if (TARGET_32BIT
23377       && regno > PC_REGNUM
23378       && regno != FRAME_POINTER_REGNUM
23379       && regno != ARG_POINTER_REGNUM
23380       && !IS_VFP_REGNUM (regno))
23381     return 1;
23382
23383   return ARM_NUM_REGS (mode);
23384 }
23385
23386 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23387 static bool
23388 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23389 {
23390   if (GET_MODE_CLASS (mode) == MODE_CC)
23391     return (regno == CC_REGNUM
23392             || (TARGET_HARD_FLOAT
23393                 && regno == VFPCC_REGNUM));
23394
23395   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23396     return false;
23397
23398   if (TARGET_THUMB1)
23399     /* For the Thumb we only allow values bigger than SImode in
23400        registers 0 - 6, so that there is always a second low
23401        register available to hold the upper part of the value.
23402        We probably we ought to ensure that the register is the
23403        start of an even numbered register pair.  */
23404     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23405
23406   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23407     {
23408       if (mode == SFmode || mode == SImode)
23409         return VFP_REGNO_OK_FOR_SINGLE (regno);
23410
23411       if (mode == DFmode)
23412         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23413
23414       if (mode == HFmode)
23415         return VFP_REGNO_OK_FOR_SINGLE (regno);
23416
23417       /* VFP registers can hold HImode values.  */
23418       if (mode == HImode)
23419         return VFP_REGNO_OK_FOR_SINGLE (regno);
23420
23421       if (TARGET_NEON)
23422         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23423                || (VALID_NEON_QREG_MODE (mode)
23424                    && NEON_REGNO_OK_FOR_QUAD (regno))
23425                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23426                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23427                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23428                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23429                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23430
23431       return false;
23432     }
23433
23434   if (TARGET_REALLY_IWMMXT)
23435     {
23436       if (IS_IWMMXT_GR_REGNUM (regno))
23437         return mode == SImode;
23438
23439       if (IS_IWMMXT_REGNUM (regno))
23440         return VALID_IWMMXT_REG_MODE (mode);
23441     }
23442
23443   /* We allow almost any value to be stored in the general registers.
23444      Restrict doubleword quantities to even register pairs in ARM state
23445      so that we can use ldrd.  Do not allow very large Neon structure
23446      opaque modes in general registers; they would use too many.  */
23447   if (regno <= LAST_ARM_REGNUM)
23448     {
23449       if (ARM_NUM_REGS (mode) > 4)
23450         return false;
23451
23452       if (TARGET_THUMB2)
23453         return true;
23454
23455       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23456     }
23457
23458   if (regno == FRAME_POINTER_REGNUM
23459       || regno == ARG_POINTER_REGNUM)
23460     /* We only allow integers in the fake hard registers.  */
23461     return GET_MODE_CLASS (mode) == MODE_INT;
23462
23463   return false;
23464 }
23465
23466 /* Implement TARGET_MODES_TIEABLE_P.  */
23467
23468 static bool
23469 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23470 {
23471   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23472     return true;
23473
23474   /* We specifically want to allow elements of "structure" modes to
23475      be tieable to the structure.  This more general condition allows
23476      other rarer situations too.  */
23477   if (TARGET_NEON
23478       && (VALID_NEON_DREG_MODE (mode1)
23479           || VALID_NEON_QREG_MODE (mode1)
23480           || VALID_NEON_STRUCT_MODE (mode1))
23481       && (VALID_NEON_DREG_MODE (mode2)
23482           || VALID_NEON_QREG_MODE (mode2)
23483           || VALID_NEON_STRUCT_MODE (mode2)))
23484     return true;
23485
23486   return false;
23487 }
23488
23489 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23490    not used in arm mode.  */
23491
23492 enum reg_class
23493 arm_regno_class (int regno)
23494 {
23495   if (regno == PC_REGNUM)
23496     return NO_REGS;
23497
23498   if (TARGET_THUMB1)
23499     {
23500       if (regno == STACK_POINTER_REGNUM)
23501         return STACK_REG;
23502       if (regno == CC_REGNUM)
23503         return CC_REG;
23504       if (regno < 8)
23505         return LO_REGS;
23506       return HI_REGS;
23507     }
23508
23509   if (TARGET_THUMB2 && regno < 8)
23510     return LO_REGS;
23511
23512   if (   regno <= LAST_ARM_REGNUM
23513       || regno == FRAME_POINTER_REGNUM
23514       || regno == ARG_POINTER_REGNUM)
23515     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23516
23517   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23518     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23519
23520   if (IS_VFP_REGNUM (regno))
23521     {
23522       if (regno <= D7_VFP_REGNUM)
23523         return VFP_D0_D7_REGS;
23524       else if (regno <= LAST_LO_VFP_REGNUM)
23525         return VFP_LO_REGS;
23526       else
23527         return VFP_HI_REGS;
23528     }
23529
23530   if (IS_IWMMXT_REGNUM (regno))
23531     return IWMMXT_REGS;
23532
23533   if (IS_IWMMXT_GR_REGNUM (regno))
23534     return IWMMXT_GR_REGS;
23535
23536   return NO_REGS;
23537 }
23538
23539 /* Handle a special case when computing the offset
23540    of an argument from the frame pointer.  */
23541 int
23542 arm_debugger_arg_offset (int value, rtx addr)
23543 {
23544   rtx_insn *insn;
23545
23546   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23547   if (value != 0)
23548     return 0;
23549
23550   /* We can only cope with the case where the address is held in a register.  */
23551   if (!REG_P (addr))
23552     return 0;
23553
23554   /* If we are using the frame pointer to point at the argument, then
23555      an offset of 0 is correct.  */
23556   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23557     return 0;
23558
23559   /* If we are using the stack pointer to point at the
23560      argument, then an offset of 0 is correct.  */
23561   /* ??? Check this is consistent with thumb2 frame layout.  */
23562   if ((TARGET_THUMB || !frame_pointer_needed)
23563       && REGNO (addr) == SP_REGNUM)
23564     return 0;
23565
23566   /* Oh dear.  The argument is pointed to by a register rather
23567      than being held in a register, or being stored at a known
23568      offset from the frame pointer.  Since GDB only understands
23569      those two kinds of argument we must translate the address
23570      held in the register into an offset from the frame pointer.
23571      We do this by searching through the insns for the function
23572      looking to see where this register gets its value.  If the
23573      register is initialized from the frame pointer plus an offset
23574      then we are in luck and we can continue, otherwise we give up.
23575
23576      This code is exercised by producing debugging information
23577      for a function with arguments like this:
23578
23579            double func (double a, double b, int c, double d) {return d;}
23580
23581      Without this code the stab for parameter 'd' will be set to
23582      an offset of 0 from the frame pointer, rather than 8.  */
23583
23584   /* The if() statement says:
23585
23586      If the insn is a normal instruction
23587      and if the insn is setting the value in a register
23588      and if the register being set is the register holding the address of the argument
23589      and if the address is computing by an addition
23590      that involves adding to a register
23591      which is the frame pointer
23592      a constant integer
23593
23594      then...  */
23595
23596   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23597     {
23598       if (   NONJUMP_INSN_P (insn)
23599           && GET_CODE (PATTERN (insn)) == SET
23600           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23601           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23602           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23603           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23604           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23605              )
23606         {
23607           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23608
23609           break;
23610         }
23611     }
23612
23613   if (value == 0)
23614     {
23615       debug_rtx (addr);
23616       warning (0, "unable to compute real location of stacked parameter");
23617       value = 8; /* XXX magic hack */
23618     }
23619
23620   return value;
23621 }
23622 \f
23623 /* Implement TARGET_PROMOTED_TYPE.  */
23624
23625 static tree
23626 arm_promoted_type (const_tree t)
23627 {
23628   if (SCALAR_FLOAT_TYPE_P (t)
23629       && TYPE_PRECISION (t) == 16
23630       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23631     return float_type_node;
23632   return NULL_TREE;
23633 }
23634
23635 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23636    This simply adds HFmode as a supported mode; even though we don't
23637    implement arithmetic on this type directly, it's supported by
23638    optabs conversions, much the way the double-word arithmetic is
23639    special-cased in the default hook.  */
23640
23641 static bool
23642 arm_scalar_mode_supported_p (scalar_mode mode)
23643 {
23644   if (mode == HFmode)
23645     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23646   else if (ALL_FIXED_POINT_MODE_P (mode))
23647     return true;
23648   else
23649     return default_scalar_mode_supported_p (mode);
23650 }
23651
23652 /* Set the value of FLT_EVAL_METHOD.
23653    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23654
23655     0: evaluate all operations and constants, whose semantic type has at
23656        most the range and precision of type float, to the range and
23657        precision of float; evaluate all other operations and constants to
23658        the range and precision of the semantic type;
23659
23660     N, where _FloatN is a supported interchange floating type
23661        evaluate all operations and constants, whose semantic type has at
23662        most the range and precision of _FloatN type, to the range and
23663        precision of the _FloatN type; evaluate all other operations and
23664        constants to the range and precision of the semantic type;
23665
23666    If we have the ARMv8.2-A extensions then we support _Float16 in native
23667    precision, so we should set this to 16.  Otherwise, we support the type,
23668    but want to evaluate expressions in float precision, so set this to
23669    0.  */
23670
23671 static enum flt_eval_method
23672 arm_excess_precision (enum excess_precision_type type)
23673 {
23674   switch (type)
23675     {
23676       case EXCESS_PRECISION_TYPE_FAST:
23677       case EXCESS_PRECISION_TYPE_STANDARD:
23678         /* We can calculate either in 16-bit range and precision or
23679            32-bit range and precision.  Make that decision based on whether
23680            we have native support for the ARMv8.2-A 16-bit floating-point
23681            instructions or not.  */
23682         return (TARGET_VFP_FP16INST
23683                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23684                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23685       case EXCESS_PRECISION_TYPE_IMPLICIT:
23686         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23687       default:
23688         gcc_unreachable ();
23689     }
23690   return FLT_EVAL_METHOD_UNPREDICTABLE;
23691 }
23692
23693
23694 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23695    _Float16 if we are using anything other than ieee format for 16-bit
23696    floating point.  Otherwise, punt to the default implementation.  */
23697 static opt_scalar_float_mode
23698 arm_floatn_mode (int n, bool extended)
23699 {
23700   if (!extended && n == 16)
23701     {
23702       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23703         return HFmode;
23704       return opt_scalar_float_mode ();
23705     }
23706
23707   return default_floatn_mode (n, extended);
23708 }
23709
23710
23711 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23712    not to early-clobber SRC registers in the process.
23713
23714    We assume that the operands described by SRC and DEST represent a
23715    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23716    number of components into which the copy has been decomposed.  */
23717 void
23718 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23719 {
23720   unsigned int i;
23721
23722   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23723       || REGNO (operands[0]) < REGNO (operands[1]))
23724     {
23725       for (i = 0; i < count; i++)
23726         {
23727           operands[2 * i] = dest[i];
23728           operands[2 * i + 1] = src[i];
23729         }
23730     }
23731   else
23732     {
23733       for (i = 0; i < count; i++)
23734         {
23735           operands[2 * i] = dest[count - i - 1];
23736           operands[2 * i + 1] = src[count - i - 1];
23737         }
23738     }
23739 }
23740
23741 /* Split operands into moves from op[1] + op[2] into op[0].  */
23742
23743 void
23744 neon_split_vcombine (rtx operands[3])
23745 {
23746   unsigned int dest = REGNO (operands[0]);
23747   unsigned int src1 = REGNO (operands[1]);
23748   unsigned int src2 = REGNO (operands[2]);
23749   machine_mode halfmode = GET_MODE (operands[1]);
23750   unsigned int halfregs = REG_NREGS (operands[1]);
23751   rtx destlo, desthi;
23752
23753   if (src1 == dest && src2 == dest + halfregs)
23754     {
23755       /* No-op move.  Can't split to nothing; emit something.  */
23756       emit_note (NOTE_INSN_DELETED);
23757       return;
23758     }
23759
23760   /* Preserve register attributes for variable tracking.  */
23761   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23762   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23763                                GET_MODE_SIZE (halfmode));
23764
23765   /* Special case of reversed high/low parts.  Use VSWP.  */
23766   if (src2 == dest && src1 == dest + halfregs)
23767     {
23768       rtx x = gen_rtx_SET (destlo, operands[1]);
23769       rtx y = gen_rtx_SET (desthi, operands[2]);
23770       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23771       return;
23772     }
23773
23774   if (!reg_overlap_mentioned_p (operands[2], destlo))
23775     {
23776       /* Try to avoid unnecessary moves if part of the result
23777          is in the right place already.  */
23778       if (src1 != dest)
23779         emit_move_insn (destlo, operands[1]);
23780       if (src2 != dest + halfregs)
23781         emit_move_insn (desthi, operands[2]);
23782     }
23783   else
23784     {
23785       if (src2 != dest + halfregs)
23786         emit_move_insn (desthi, operands[2]);
23787       if (src1 != dest)
23788         emit_move_insn (destlo, operands[1]);
23789     }
23790 }
23791 \f
23792 /* Return the number (counting from 0) of
23793    the least significant set bit in MASK.  */
23794
23795 inline static int
23796 number_of_first_bit_set (unsigned mask)
23797 {
23798   return ctz_hwi (mask);
23799 }
23800
23801 /* Like emit_multi_reg_push, but allowing for a different set of
23802    registers to be described as saved.  MASK is the set of registers
23803    to be saved; REAL_REGS is the set of registers to be described as
23804    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23805
23806 static rtx_insn *
23807 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23808 {
23809   unsigned long regno;
23810   rtx par[10], tmp, reg;
23811   rtx_insn *insn;
23812   int i, j;
23813
23814   /* Build the parallel of the registers actually being stored.  */
23815   for (i = 0; mask; ++i, mask &= mask - 1)
23816     {
23817       regno = ctz_hwi (mask);
23818       reg = gen_rtx_REG (SImode, regno);
23819
23820       if (i == 0)
23821         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23822       else
23823         tmp = gen_rtx_USE (VOIDmode, reg);
23824
23825       par[i] = tmp;
23826     }
23827
23828   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23829   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23830   tmp = gen_frame_mem (BLKmode, tmp);
23831   tmp = gen_rtx_SET (tmp, par[0]);
23832   par[0] = tmp;
23833
23834   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23835   insn = emit_insn (tmp);
23836
23837   /* Always build the stack adjustment note for unwind info.  */
23838   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23839   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23840   par[0] = tmp;
23841
23842   /* Build the parallel of the registers recorded as saved for unwind.  */
23843   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23844     {
23845       regno = ctz_hwi (real_regs);
23846       reg = gen_rtx_REG (SImode, regno);
23847
23848       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23849       tmp = gen_frame_mem (SImode, tmp);
23850       tmp = gen_rtx_SET (tmp, reg);
23851       RTX_FRAME_RELATED_P (tmp) = 1;
23852       par[j + 1] = tmp;
23853     }
23854
23855   if (j == 0)
23856     tmp = par[0];
23857   else
23858     {
23859       RTX_FRAME_RELATED_P (par[0]) = 1;
23860       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23861     }
23862
23863   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23864
23865   return insn;
23866 }
23867
23868 /* Emit code to push or pop registers to or from the stack.  F is the
23869    assembly file.  MASK is the registers to pop.  */
23870 static void
23871 thumb_pop (FILE *f, unsigned long mask)
23872 {
23873   int regno;
23874   int lo_mask = mask & 0xFF;
23875
23876   gcc_assert (mask);
23877
23878   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23879     {
23880       /* Special case.  Do not generate a POP PC statement here, do it in
23881          thumb_exit() */
23882       thumb_exit (f, -1);
23883       return;
23884     }
23885
23886   fprintf (f, "\tpop\t{");
23887
23888   /* Look at the low registers first.  */
23889   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23890     {
23891       if (lo_mask & 1)
23892         {
23893           asm_fprintf (f, "%r", regno);
23894
23895           if ((lo_mask & ~1) != 0)
23896             fprintf (f, ", ");
23897         }
23898     }
23899
23900   if (mask & (1 << PC_REGNUM))
23901     {
23902       /* Catch popping the PC.  */
23903       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23904           || IS_CMSE_ENTRY (arm_current_func_type ()))
23905         {
23906           /* The PC is never poped directly, instead
23907              it is popped into r3 and then BX is used.  */
23908           fprintf (f, "}\n");
23909
23910           thumb_exit (f, -1);
23911
23912           return;
23913         }
23914       else
23915         {
23916           if (mask & 0xFF)
23917             fprintf (f, ", ");
23918
23919           asm_fprintf (f, "%r", PC_REGNUM);
23920         }
23921     }
23922
23923   fprintf (f, "}\n");
23924 }
23925
23926 /* Generate code to return from a thumb function.
23927    If 'reg_containing_return_addr' is -1, then the return address is
23928    actually on the stack, at the stack pointer.  */
23929 static void
23930 thumb_exit (FILE *f, int reg_containing_return_addr)
23931 {
23932   unsigned regs_available_for_popping;
23933   unsigned regs_to_pop;
23934   int pops_needed;
23935   unsigned available;
23936   unsigned required;
23937   machine_mode mode;
23938   int size;
23939   int restore_a4 = FALSE;
23940
23941   /* Compute the registers we need to pop.  */
23942   regs_to_pop = 0;
23943   pops_needed = 0;
23944
23945   if (reg_containing_return_addr == -1)
23946     {
23947       regs_to_pop |= 1 << LR_REGNUM;
23948       ++pops_needed;
23949     }
23950
23951   if (TARGET_BACKTRACE)
23952     {
23953       /* Restore the (ARM) frame pointer and stack pointer.  */
23954       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23955       pops_needed += 2;
23956     }
23957
23958   /* If there is nothing to pop then just emit the BX instruction and
23959      return.  */
23960   if (pops_needed == 0)
23961     {
23962       if (crtl->calls_eh_return)
23963         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23964
23965       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23966         {
23967           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23968                        reg_containing_return_addr);
23969           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23970         }
23971       else
23972         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23973       return;
23974     }
23975   /* Otherwise if we are not supporting interworking and we have not created
23976      a backtrace structure and the function was not entered in ARM mode then
23977      just pop the return address straight into the PC.  */
23978   else if (!TARGET_INTERWORK
23979            && !TARGET_BACKTRACE
23980            && !is_called_in_ARM_mode (current_function_decl)
23981            && !crtl->calls_eh_return
23982            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23983     {
23984       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23985       return;
23986     }
23987
23988   /* Find out how many of the (return) argument registers we can corrupt.  */
23989   regs_available_for_popping = 0;
23990
23991   /* If returning via __builtin_eh_return, the bottom three registers
23992      all contain information needed for the return.  */
23993   if (crtl->calls_eh_return)
23994     size = 12;
23995   else
23996     {
23997       /* If we can deduce the registers used from the function's
23998          return value.  This is more reliable that examining
23999          df_regs_ever_live_p () because that will be set if the register is
24000          ever used in the function, not just if the register is used
24001          to hold a return value.  */
24002
24003       if (crtl->return_rtx != 0)
24004         mode = GET_MODE (crtl->return_rtx);
24005       else
24006         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24007
24008       size = GET_MODE_SIZE (mode);
24009
24010       if (size == 0)
24011         {
24012           /* In a void function we can use any argument register.
24013              In a function that returns a structure on the stack
24014              we can use the second and third argument registers.  */
24015           if (mode == VOIDmode)
24016             regs_available_for_popping =
24017               (1 << ARG_REGISTER (1))
24018               | (1 << ARG_REGISTER (2))
24019               | (1 << ARG_REGISTER (3));
24020           else
24021             regs_available_for_popping =
24022               (1 << ARG_REGISTER (2))
24023               | (1 << ARG_REGISTER (3));
24024         }
24025       else if (size <= 4)
24026         regs_available_for_popping =
24027           (1 << ARG_REGISTER (2))
24028           | (1 << ARG_REGISTER (3));
24029       else if (size <= 8)
24030         regs_available_for_popping =
24031           (1 << ARG_REGISTER (3));
24032     }
24033
24034   /* Match registers to be popped with registers into which we pop them.  */
24035   for (available = regs_available_for_popping,
24036        required  = regs_to_pop;
24037        required != 0 && available != 0;
24038        available &= ~(available & - available),
24039        required  &= ~(required  & - required))
24040     -- pops_needed;
24041
24042   /* If we have any popping registers left over, remove them.  */
24043   if (available > 0)
24044     regs_available_for_popping &= ~available;
24045
24046   /* Otherwise if we need another popping register we can use
24047      the fourth argument register.  */
24048   else if (pops_needed)
24049     {
24050       /* If we have not found any free argument registers and
24051          reg a4 contains the return address, we must move it.  */
24052       if (regs_available_for_popping == 0
24053           && reg_containing_return_addr == LAST_ARG_REGNUM)
24054         {
24055           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24056           reg_containing_return_addr = LR_REGNUM;
24057         }
24058       else if (size > 12)
24059         {
24060           /* Register a4 is being used to hold part of the return value,
24061              but we have dire need of a free, low register.  */
24062           restore_a4 = TRUE;
24063
24064           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24065         }
24066
24067       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24068         {
24069           /* The fourth argument register is available.  */
24070           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24071
24072           --pops_needed;
24073         }
24074     }
24075
24076   /* Pop as many registers as we can.  */
24077   thumb_pop (f, regs_available_for_popping);
24078
24079   /* Process the registers we popped.  */
24080   if (reg_containing_return_addr == -1)
24081     {
24082       /* The return address was popped into the lowest numbered register.  */
24083       regs_to_pop &= ~(1 << LR_REGNUM);
24084
24085       reg_containing_return_addr =
24086         number_of_first_bit_set (regs_available_for_popping);
24087
24088       /* Remove this register for the mask of available registers, so that
24089          the return address will not be corrupted by further pops.  */
24090       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24091     }
24092
24093   /* If we popped other registers then handle them here.  */
24094   if (regs_available_for_popping)
24095     {
24096       int frame_pointer;
24097
24098       /* Work out which register currently contains the frame pointer.  */
24099       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24100
24101       /* Move it into the correct place.  */
24102       asm_fprintf (f, "\tmov\t%r, %r\n",
24103                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24104
24105       /* (Temporarily) remove it from the mask of popped registers.  */
24106       regs_available_for_popping &= ~(1 << frame_pointer);
24107       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24108
24109       if (regs_available_for_popping)
24110         {
24111           int stack_pointer;
24112
24113           /* We popped the stack pointer as well,
24114              find the register that contains it.  */
24115           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24116
24117           /* Move it into the stack register.  */
24118           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24119
24120           /* At this point we have popped all necessary registers, so
24121              do not worry about restoring regs_available_for_popping
24122              to its correct value:
24123
24124              assert (pops_needed == 0)
24125              assert (regs_available_for_popping == (1 << frame_pointer))
24126              assert (regs_to_pop == (1 << STACK_POINTER))  */
24127         }
24128       else
24129         {
24130           /* Since we have just move the popped value into the frame
24131              pointer, the popping register is available for reuse, and
24132              we know that we still have the stack pointer left to pop.  */
24133           regs_available_for_popping |= (1 << frame_pointer);
24134         }
24135     }
24136
24137   /* If we still have registers left on the stack, but we no longer have
24138      any registers into which we can pop them, then we must move the return
24139      address into the link register and make available the register that
24140      contained it.  */
24141   if (regs_available_for_popping == 0 && pops_needed > 0)
24142     {
24143       regs_available_for_popping |= 1 << reg_containing_return_addr;
24144
24145       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24146                    reg_containing_return_addr);
24147
24148       reg_containing_return_addr = LR_REGNUM;
24149     }
24150
24151   /* If we have registers left on the stack then pop some more.
24152      We know that at most we will want to pop FP and SP.  */
24153   if (pops_needed > 0)
24154     {
24155       int  popped_into;
24156       int  move_to;
24157
24158       thumb_pop (f, regs_available_for_popping);
24159
24160       /* We have popped either FP or SP.
24161          Move whichever one it is into the correct register.  */
24162       popped_into = number_of_first_bit_set (regs_available_for_popping);
24163       move_to     = number_of_first_bit_set (regs_to_pop);
24164
24165       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24166       --pops_needed;
24167     }
24168
24169   /* If we still have not popped everything then we must have only
24170      had one register available to us and we are now popping the SP.  */
24171   if (pops_needed > 0)
24172     {
24173       int  popped_into;
24174
24175       thumb_pop (f, regs_available_for_popping);
24176
24177       popped_into = number_of_first_bit_set (regs_available_for_popping);
24178
24179       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24180       /*
24181         assert (regs_to_pop == (1 << STACK_POINTER))
24182         assert (pops_needed == 1)
24183       */
24184     }
24185
24186   /* If necessary restore the a4 register.  */
24187   if (restore_a4)
24188     {
24189       if (reg_containing_return_addr != LR_REGNUM)
24190         {
24191           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24192           reg_containing_return_addr = LR_REGNUM;
24193         }
24194
24195       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24196     }
24197
24198   if (crtl->calls_eh_return)
24199     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24200
24201   /* Return to caller.  */
24202   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24203     {
24204       /* This is for the cases where LR is not being used to contain the return
24205          address.  It may therefore contain information that we might not want
24206          to leak, hence it must be cleared.  The value in R0 will never be a
24207          secret at this point, so it is safe to use it, see the clearing code
24208          in 'cmse_nonsecure_entry_clear_before_return'.  */
24209       if (reg_containing_return_addr != LR_REGNUM)
24210         asm_fprintf (f, "\tmov\tlr, r0\n");
24211
24212       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24213       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24214     }
24215   else
24216     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24217 }
24218 \f
24219 /* Scan INSN just before assembler is output for it.
24220    For Thumb-1, we track the status of the condition codes; this
24221    information is used in the cbranchsi4_insn pattern.  */
24222 void
24223 thumb1_final_prescan_insn (rtx_insn *insn)
24224 {
24225   if (flag_print_asm_name)
24226     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24227                  INSN_ADDRESSES (INSN_UID (insn)));
24228   /* Don't overwrite the previous setter when we get to a cbranch.  */
24229   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24230     {
24231       enum attr_conds conds;
24232
24233       if (cfun->machine->thumb1_cc_insn)
24234         {
24235           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24236               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24237             CC_STATUS_INIT;
24238         }
24239       conds = get_attr_conds (insn);
24240       if (conds == CONDS_SET)
24241         {
24242           rtx set = single_set (insn);
24243           cfun->machine->thumb1_cc_insn = insn;
24244           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24245           cfun->machine->thumb1_cc_op1 = const0_rtx;
24246           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24247           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24248             {
24249               rtx src1 = XEXP (SET_SRC (set), 1);
24250               if (src1 == const0_rtx)
24251                 cfun->machine->thumb1_cc_mode = CCmode;
24252             }
24253           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24254             {
24255               /* Record the src register operand instead of dest because
24256                  cprop_hardreg pass propagates src.  */
24257               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24258             }
24259         }
24260       else if (conds != CONDS_NOCOND)
24261         cfun->machine->thumb1_cc_insn = NULL_RTX;
24262     }
24263
24264     /* Check if unexpected far jump is used.  */
24265     if (cfun->machine->lr_save_eliminated
24266         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24267       internal_error("Unexpected thumb1 far jump");
24268 }
24269
24270 int
24271 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24272 {
24273   unsigned HOST_WIDE_INT mask = 0xff;
24274   int i;
24275
24276   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24277   if (val == 0) /* XXX */
24278     return 0;
24279
24280   for (i = 0; i < 25; i++)
24281     if ((val & (mask << i)) == val)
24282       return 1;
24283
24284   return 0;
24285 }
24286
24287 /* Returns nonzero if the current function contains,
24288    or might contain a far jump.  */
24289 static int
24290 thumb_far_jump_used_p (void)
24291 {
24292   rtx_insn *insn;
24293   bool far_jump = false;
24294   unsigned int func_size = 0;
24295
24296   /* If we have already decided that far jumps may be used,
24297      do not bother checking again, and always return true even if
24298      it turns out that they are not being used.  Once we have made
24299      the decision that far jumps are present (and that hence the link
24300      register will be pushed onto the stack) we cannot go back on it.  */
24301   if (cfun->machine->far_jump_used)
24302     return 1;
24303
24304   /* If this function is not being called from the prologue/epilogue
24305      generation code then it must be being called from the
24306      INITIAL_ELIMINATION_OFFSET macro.  */
24307   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24308     {
24309       /* In this case we know that we are being asked about the elimination
24310          of the arg pointer register.  If that register is not being used,
24311          then there are no arguments on the stack, and we do not have to
24312          worry that a far jump might force the prologue to push the link
24313          register, changing the stack offsets.  In this case we can just
24314          return false, since the presence of far jumps in the function will
24315          not affect stack offsets.
24316
24317          If the arg pointer is live (or if it was live, but has now been
24318          eliminated and so set to dead) then we do have to test to see if
24319          the function might contain a far jump.  This test can lead to some
24320          false negatives, since before reload is completed, then length of
24321          branch instructions is not known, so gcc defaults to returning their
24322          longest length, which in turn sets the far jump attribute to true.
24323
24324          A false negative will not result in bad code being generated, but it
24325          will result in a needless push and pop of the link register.  We
24326          hope that this does not occur too often.
24327
24328          If we need doubleword stack alignment this could affect the other
24329          elimination offsets so we can't risk getting it wrong.  */
24330       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24331         cfun->machine->arg_pointer_live = 1;
24332       else if (!cfun->machine->arg_pointer_live)
24333         return 0;
24334     }
24335
24336   /* We should not change far_jump_used during or after reload, as there is
24337      no chance to change stack frame layout.  */
24338   if (reload_in_progress || reload_completed)
24339     return 0;
24340
24341   /* Check to see if the function contains a branch
24342      insn with the far jump attribute set.  */
24343   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24344     {
24345       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24346         {
24347           far_jump = true;
24348         }
24349       func_size += get_attr_length (insn);
24350     }
24351
24352   /* Attribute far_jump will always be true for thumb1 before
24353      shorten_branch pass.  So checking far_jump attribute before
24354      shorten_branch isn't much useful.
24355
24356      Following heuristic tries to estimate more accurately if a far jump
24357      may finally be used.  The heuristic is very conservative as there is
24358      no chance to roll-back the decision of not to use far jump.
24359
24360      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24361      2-byte insn is associated with a 4 byte constant pool.  Using
24362      function size 2048/3 as the threshold is conservative enough.  */
24363   if (far_jump)
24364     {
24365       if ((func_size * 3) >= 2048)
24366         {
24367           /* Record the fact that we have decided that
24368              the function does use far jumps.  */
24369           cfun->machine->far_jump_used = 1;
24370           return 1;
24371         }
24372     }
24373
24374   return 0;
24375 }
24376
24377 /* Return nonzero if FUNC must be entered in ARM mode.  */
24378 static bool
24379 is_called_in_ARM_mode (tree func)
24380 {
24381   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24382
24383   /* Ignore the problem about functions whose address is taken.  */
24384   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24385     return true;
24386
24387 #ifdef ARM_PE
24388   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24389 #else
24390   return false;
24391 #endif
24392 }
24393
24394 /* Given the stack offsets and register mask in OFFSETS, decide how
24395    many additional registers to push instead of subtracting a constant
24396    from SP.  For epilogues the principle is the same except we use pop.
24397    FOR_PROLOGUE indicates which we're generating.  */
24398 static int
24399 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24400 {
24401   HOST_WIDE_INT amount;
24402   unsigned long live_regs_mask = offsets->saved_regs_mask;
24403   /* Extract a mask of the ones we can give to the Thumb's push/pop
24404      instruction.  */
24405   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24406   /* Then count how many other high registers will need to be pushed.  */
24407   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24408   int n_free, reg_base, size;
24409
24410   if (!for_prologue && frame_pointer_needed)
24411     amount = offsets->locals_base - offsets->saved_regs;
24412   else
24413     amount = offsets->outgoing_args - offsets->saved_regs;
24414
24415   /* If the stack frame size is 512 exactly, we can save one load
24416      instruction, which should make this a win even when optimizing
24417      for speed.  */
24418   if (!optimize_size && amount != 512)
24419     return 0;
24420
24421   /* Can't do this if there are high registers to push.  */
24422   if (high_regs_pushed != 0)
24423     return 0;
24424
24425   /* Shouldn't do it in the prologue if no registers would normally
24426      be pushed at all.  In the epilogue, also allow it if we'll have
24427      a pop insn for the PC.  */
24428   if  (l_mask == 0
24429        && (for_prologue
24430            || TARGET_BACKTRACE
24431            || (live_regs_mask & 1 << LR_REGNUM) == 0
24432            || TARGET_INTERWORK
24433            || crtl->args.pretend_args_size != 0))
24434     return 0;
24435
24436   /* Don't do this if thumb_expand_prologue wants to emit instructions
24437      between the push and the stack frame allocation.  */
24438   if (for_prologue
24439       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24440           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24441     return 0;
24442
24443   reg_base = 0;
24444   n_free = 0;
24445   if (!for_prologue)
24446     {
24447       size = arm_size_return_regs ();
24448       reg_base = ARM_NUM_INTS (size);
24449       live_regs_mask >>= reg_base;
24450     }
24451
24452   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24453          && (for_prologue || call_used_regs[reg_base + n_free]))
24454     {
24455       live_regs_mask >>= 1;
24456       n_free++;
24457     }
24458
24459   if (n_free == 0)
24460     return 0;
24461   gcc_assert (amount / 4 * 4 == amount);
24462
24463   if (amount >= 512 && (amount - n_free * 4) < 512)
24464     return (amount - 508) / 4;
24465   if (amount <= n_free * 4)
24466     return amount / 4;
24467   return 0;
24468 }
24469
24470 /* The bits which aren't usefully expanded as rtl.  */
24471 const char *
24472 thumb1_unexpanded_epilogue (void)
24473 {
24474   arm_stack_offsets *offsets;
24475   int regno;
24476   unsigned long live_regs_mask = 0;
24477   int high_regs_pushed = 0;
24478   int extra_pop;
24479   int had_to_push_lr;
24480   int size;
24481
24482   if (cfun->machine->return_used_this_function != 0)
24483     return "";
24484
24485   if (IS_NAKED (arm_current_func_type ()))
24486     return "";
24487
24488   offsets = arm_get_frame_offsets ();
24489   live_regs_mask = offsets->saved_regs_mask;
24490   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24491
24492   /* If we can deduce the registers used from the function's return value.
24493      This is more reliable that examining df_regs_ever_live_p () because that
24494      will be set if the register is ever used in the function, not just if
24495      the register is used to hold a return value.  */
24496   size = arm_size_return_regs ();
24497
24498   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24499   if (extra_pop > 0)
24500     {
24501       unsigned long extra_mask = (1 << extra_pop) - 1;
24502       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24503     }
24504
24505   /* The prolog may have pushed some high registers to use as
24506      work registers.  e.g. the testsuite file:
24507      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24508      compiles to produce:
24509         push    {r4, r5, r6, r7, lr}
24510         mov     r7, r9
24511         mov     r6, r8
24512         push    {r6, r7}
24513      as part of the prolog.  We have to undo that pushing here.  */
24514
24515   if (high_regs_pushed)
24516     {
24517       unsigned long mask = live_regs_mask & 0xff;
24518       int next_hi_reg;
24519
24520       /* The available low registers depend on the size of the value we are
24521          returning.  */
24522       if (size <= 12)
24523         mask |=  1 << 3;
24524       if (size <= 8)
24525         mask |= 1 << 2;
24526
24527       if (mask == 0)
24528         /* Oh dear!  We have no low registers into which we can pop
24529            high registers!  */
24530         internal_error
24531           ("no low registers available for popping high registers");
24532
24533       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24534         if (live_regs_mask & (1 << next_hi_reg))
24535           break;
24536
24537       while (high_regs_pushed)
24538         {
24539           /* Find lo register(s) into which the high register(s) can
24540              be popped.  */
24541           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24542             {
24543               if (mask & (1 << regno))
24544                 high_regs_pushed--;
24545               if (high_regs_pushed == 0)
24546                 break;
24547             }
24548
24549           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24550
24551           /* Pop the values into the low register(s).  */
24552           thumb_pop (asm_out_file, mask);
24553
24554           /* Move the value(s) into the high registers.  */
24555           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24556             {
24557               if (mask & (1 << regno))
24558                 {
24559                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24560                                regno);
24561
24562                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24563                     if (live_regs_mask & (1 << next_hi_reg))
24564                       break;
24565                 }
24566             }
24567         }
24568       live_regs_mask &= ~0x0f00;
24569     }
24570
24571   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24572   live_regs_mask &= 0xff;
24573
24574   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24575     {
24576       /* Pop the return address into the PC.  */
24577       if (had_to_push_lr)
24578         live_regs_mask |= 1 << PC_REGNUM;
24579
24580       /* Either no argument registers were pushed or a backtrace
24581          structure was created which includes an adjusted stack
24582          pointer, so just pop everything.  */
24583       if (live_regs_mask)
24584         thumb_pop (asm_out_file, live_regs_mask);
24585
24586       /* We have either just popped the return address into the
24587          PC or it is was kept in LR for the entire function.
24588          Note that thumb_pop has already called thumb_exit if the
24589          PC was in the list.  */
24590       if (!had_to_push_lr)
24591         thumb_exit (asm_out_file, LR_REGNUM);
24592     }
24593   else
24594     {
24595       /* Pop everything but the return address.  */
24596       if (live_regs_mask)
24597         thumb_pop (asm_out_file, live_regs_mask);
24598
24599       if (had_to_push_lr)
24600         {
24601           if (size > 12)
24602             {
24603               /* We have no free low regs, so save one.  */
24604               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24605                            LAST_ARG_REGNUM);
24606             }
24607
24608           /* Get the return address into a temporary register.  */
24609           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24610
24611           if (size > 12)
24612             {
24613               /* Move the return address to lr.  */
24614               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24615                            LAST_ARG_REGNUM);
24616               /* Restore the low register.  */
24617               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24618                            IP_REGNUM);
24619               regno = LR_REGNUM;
24620             }
24621           else
24622             regno = LAST_ARG_REGNUM;
24623         }
24624       else
24625         regno = LR_REGNUM;
24626
24627       /* Remove the argument registers that were pushed onto the stack.  */
24628       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24629                    SP_REGNUM, SP_REGNUM,
24630                    crtl->args.pretend_args_size);
24631
24632       thumb_exit (asm_out_file, regno);
24633     }
24634
24635   return "";
24636 }
24637
24638 /* Functions to save and restore machine-specific function data.  */
24639 static struct machine_function *
24640 arm_init_machine_status (void)
24641 {
24642   struct machine_function *machine;
24643   machine = ggc_cleared_alloc<machine_function> ();
24644
24645 #if ARM_FT_UNKNOWN != 0
24646   machine->func_type = ARM_FT_UNKNOWN;
24647 #endif
24648   return machine;
24649 }
24650
24651 /* Return an RTX indicating where the return address to the
24652    calling function can be found.  */
24653 rtx
24654 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24655 {
24656   if (count != 0)
24657     return NULL_RTX;
24658
24659   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24660 }
24661
24662 /* Do anything needed before RTL is emitted for each function.  */
24663 void
24664 arm_init_expanders (void)
24665 {
24666   /* Arrange to initialize and mark the machine per-function status.  */
24667   init_machine_status = arm_init_machine_status;
24668
24669   /* This is to stop the combine pass optimizing away the alignment
24670      adjustment of va_arg.  */
24671   /* ??? It is claimed that this should not be necessary.  */
24672   if (cfun)
24673     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24674 }
24675
24676 /* Check that FUNC is called with a different mode.  */
24677
24678 bool
24679 arm_change_mode_p (tree func)
24680 {
24681   if (TREE_CODE (func) != FUNCTION_DECL)
24682     return false;
24683
24684   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24685
24686   if (!callee_tree)
24687     callee_tree = target_option_default_node;
24688
24689   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24690   int flags = callee_opts->x_target_flags;
24691
24692   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24693 }
24694
24695 /* Like arm_compute_initial_elimination offset.  Simpler because there
24696    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24697    to point at the base of the local variables after static stack
24698    space for a function has been allocated.  */
24699
24700 HOST_WIDE_INT
24701 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24702 {
24703   arm_stack_offsets *offsets;
24704
24705   offsets = arm_get_frame_offsets ();
24706
24707   switch (from)
24708     {
24709     case ARG_POINTER_REGNUM:
24710       switch (to)
24711         {
24712         case STACK_POINTER_REGNUM:
24713           return offsets->outgoing_args - offsets->saved_args;
24714
24715         case FRAME_POINTER_REGNUM:
24716           return offsets->soft_frame - offsets->saved_args;
24717
24718         case ARM_HARD_FRAME_POINTER_REGNUM:
24719           return offsets->saved_regs - offsets->saved_args;
24720
24721         case THUMB_HARD_FRAME_POINTER_REGNUM:
24722           return offsets->locals_base - offsets->saved_args;
24723
24724         default:
24725           gcc_unreachable ();
24726         }
24727       break;
24728
24729     case FRAME_POINTER_REGNUM:
24730       switch (to)
24731         {
24732         case STACK_POINTER_REGNUM:
24733           return offsets->outgoing_args - offsets->soft_frame;
24734
24735         case ARM_HARD_FRAME_POINTER_REGNUM:
24736           return offsets->saved_regs - offsets->soft_frame;
24737
24738         case THUMB_HARD_FRAME_POINTER_REGNUM:
24739           return offsets->locals_base - offsets->soft_frame;
24740
24741         default:
24742           gcc_unreachable ();
24743         }
24744       break;
24745
24746     default:
24747       gcc_unreachable ();
24748     }
24749 }
24750
24751 /* Generate the function's prologue.  */
24752
24753 void
24754 thumb1_expand_prologue (void)
24755 {
24756   rtx_insn *insn;
24757
24758   HOST_WIDE_INT amount;
24759   HOST_WIDE_INT size;
24760   arm_stack_offsets *offsets;
24761   unsigned long func_type;
24762   int regno;
24763   unsigned long live_regs_mask;
24764   unsigned long l_mask;
24765   unsigned high_regs_pushed = 0;
24766   bool lr_needs_saving;
24767
24768   func_type = arm_current_func_type ();
24769
24770   /* Naked functions don't have prologues.  */
24771   if (IS_NAKED (func_type))
24772     {
24773       if (flag_stack_usage_info)
24774         current_function_static_stack_size = 0;
24775       return;
24776     }
24777
24778   if (IS_INTERRUPT (func_type))
24779     {
24780       error ("interrupt Service Routines cannot be coded in Thumb mode");
24781       return;
24782     }
24783
24784   if (is_called_in_ARM_mode (current_function_decl))
24785     emit_insn (gen_prologue_thumb1_interwork ());
24786
24787   offsets = arm_get_frame_offsets ();
24788   live_regs_mask = offsets->saved_regs_mask;
24789   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24790
24791   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24792   l_mask = live_regs_mask & 0x40ff;
24793   /* Then count how many other high registers will need to be pushed.  */
24794   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24795
24796   if (crtl->args.pretend_args_size)
24797     {
24798       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24799
24800       if (cfun->machine->uses_anonymous_args)
24801         {
24802           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24803           unsigned long mask;
24804
24805           mask = 1ul << (LAST_ARG_REGNUM + 1);
24806           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24807
24808           insn = thumb1_emit_multi_reg_push (mask, 0);
24809         }
24810       else
24811         {
24812           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24813                                         stack_pointer_rtx, x));
24814         }
24815       RTX_FRAME_RELATED_P (insn) = 1;
24816     }
24817
24818   if (TARGET_BACKTRACE)
24819     {
24820       HOST_WIDE_INT offset = 0;
24821       unsigned work_register;
24822       rtx work_reg, x, arm_hfp_rtx;
24823
24824       /* We have been asked to create a stack backtrace structure.
24825          The code looks like this:
24826
24827          0   .align 2
24828          0   func:
24829          0     sub   SP, #16         Reserve space for 4 registers.
24830          2     push  {R7}            Push low registers.
24831          4     add   R7, SP, #20     Get the stack pointer before the push.
24832          6     str   R7, [SP, #8]    Store the stack pointer
24833                                         (before reserving the space).
24834          8     mov   R7, PC          Get hold of the start of this code + 12.
24835         10     str   R7, [SP, #16]   Store it.
24836         12     mov   R7, FP          Get hold of the current frame pointer.
24837         14     str   R7, [SP, #4]    Store it.
24838         16     mov   R7, LR          Get hold of the current return address.
24839         18     str   R7, [SP, #12]   Store it.
24840         20     add   R7, SP, #16     Point at the start of the
24841                                         backtrace structure.
24842         22     mov   FP, R7          Put this value into the frame pointer.  */
24843
24844       work_register = thumb_find_work_register (live_regs_mask);
24845       work_reg = gen_rtx_REG (SImode, work_register);
24846       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24847
24848       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24849                                     stack_pointer_rtx, GEN_INT (-16)));
24850       RTX_FRAME_RELATED_P (insn) = 1;
24851
24852       if (l_mask)
24853         {
24854           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24855           RTX_FRAME_RELATED_P (insn) = 1;
24856           lr_needs_saving = false;
24857
24858           offset = bit_count (l_mask) * UNITS_PER_WORD;
24859         }
24860
24861       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24862       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24863
24864       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24865       x = gen_frame_mem (SImode, x);
24866       emit_move_insn (x, work_reg);
24867
24868       /* Make sure that the instruction fetching the PC is in the right place
24869          to calculate "start of backtrace creation code + 12".  */
24870       /* ??? The stores using the common WORK_REG ought to be enough to
24871          prevent the scheduler from doing anything weird.  Failing that
24872          we could always move all of the following into an UNSPEC_VOLATILE.  */
24873       if (l_mask)
24874         {
24875           x = gen_rtx_REG (SImode, PC_REGNUM);
24876           emit_move_insn (work_reg, x);
24877
24878           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24879           x = gen_frame_mem (SImode, x);
24880           emit_move_insn (x, work_reg);
24881
24882           emit_move_insn (work_reg, arm_hfp_rtx);
24883
24884           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24885           x = gen_frame_mem (SImode, x);
24886           emit_move_insn (x, work_reg);
24887         }
24888       else
24889         {
24890           emit_move_insn (work_reg, arm_hfp_rtx);
24891
24892           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24893           x = gen_frame_mem (SImode, x);
24894           emit_move_insn (x, work_reg);
24895
24896           x = gen_rtx_REG (SImode, PC_REGNUM);
24897           emit_move_insn (work_reg, x);
24898
24899           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24900           x = gen_frame_mem (SImode, x);
24901           emit_move_insn (x, work_reg);
24902         }
24903
24904       x = gen_rtx_REG (SImode, LR_REGNUM);
24905       emit_move_insn (work_reg, x);
24906
24907       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24908       x = gen_frame_mem (SImode, x);
24909       emit_move_insn (x, work_reg);
24910
24911       x = GEN_INT (offset + 12);
24912       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24913
24914       emit_move_insn (arm_hfp_rtx, work_reg);
24915     }
24916   /* Optimization:  If we are not pushing any low registers but we are going
24917      to push some high registers then delay our first push.  This will just
24918      be a push of LR and we can combine it with the push of the first high
24919      register.  */
24920   else if ((l_mask & 0xff) != 0
24921            || (high_regs_pushed == 0 && lr_needs_saving))
24922     {
24923       unsigned long mask = l_mask;
24924       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24925       insn = thumb1_emit_multi_reg_push (mask, mask);
24926       RTX_FRAME_RELATED_P (insn) = 1;
24927       lr_needs_saving = false;
24928     }
24929
24930   if (high_regs_pushed)
24931     {
24932       unsigned pushable_regs;
24933       unsigned next_hi_reg;
24934       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24935                                                  : crtl->args.info.nregs;
24936       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24937
24938       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24939         if (live_regs_mask & (1 << next_hi_reg))
24940           break;
24941
24942       /* Here we need to mask out registers used for passing arguments
24943          even if they can be pushed.  This is to avoid using them to stash the high
24944          registers.  Such kind of stash may clobber the use of arguments.  */
24945       pushable_regs = l_mask & (~arg_regs_mask);
24946       if (lr_needs_saving)
24947         pushable_regs &= ~(1 << LR_REGNUM);
24948
24949       if (pushable_regs == 0)
24950         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24951
24952       while (high_regs_pushed > 0)
24953         {
24954           unsigned long real_regs_mask = 0;
24955           unsigned long push_mask = 0;
24956
24957           for (regno = LR_REGNUM; regno >= 0; regno --)
24958             {
24959               if (pushable_regs & (1 << regno))
24960                 {
24961                   emit_move_insn (gen_rtx_REG (SImode, regno),
24962                                   gen_rtx_REG (SImode, next_hi_reg));
24963
24964                   high_regs_pushed --;
24965                   real_regs_mask |= (1 << next_hi_reg);
24966                   push_mask |= (1 << regno);
24967
24968                   if (high_regs_pushed)
24969                     {
24970                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24971                            next_hi_reg --)
24972                         if (live_regs_mask & (1 << next_hi_reg))
24973                           break;
24974                     }
24975                   else
24976                     break;
24977                 }
24978             }
24979
24980           /* If we had to find a work register and we have not yet
24981              saved the LR then add it to the list of regs to push.  */
24982           if (lr_needs_saving)
24983             {
24984               push_mask |= 1 << LR_REGNUM;
24985               real_regs_mask |= 1 << LR_REGNUM;
24986               lr_needs_saving = false;
24987             }
24988
24989           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24990           RTX_FRAME_RELATED_P (insn) = 1;
24991         }
24992     }
24993
24994   /* Load the pic register before setting the frame pointer,
24995      so we can use r7 as a temporary work register.  */
24996   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24997     arm_load_pic_register (live_regs_mask);
24998
24999   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25000     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25001                     stack_pointer_rtx);
25002
25003   size = offsets->outgoing_args - offsets->saved_args;
25004   if (flag_stack_usage_info)
25005     current_function_static_stack_size = size;
25006
25007   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25008   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25009        || flag_stack_clash_protection)
25010       && size)
25011     sorry ("-fstack-check=specific for Thumb-1");
25012
25013   amount = offsets->outgoing_args - offsets->saved_regs;
25014   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25015   if (amount)
25016     {
25017       if (amount < 512)
25018         {
25019           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25020                                         GEN_INT (- amount)));
25021           RTX_FRAME_RELATED_P (insn) = 1;
25022         }
25023       else
25024         {
25025           rtx reg, dwarf;
25026
25027           /* The stack decrement is too big for an immediate value in a single
25028              insn.  In theory we could issue multiple subtracts, but after
25029              three of them it becomes more space efficient to place the full
25030              value in the constant pool and load into a register.  (Also the
25031              ARM debugger really likes to see only one stack decrement per
25032              function).  So instead we look for a scratch register into which
25033              we can load the decrement, and then we subtract this from the
25034              stack pointer.  Unfortunately on the thumb the only available
25035              scratch registers are the argument registers, and we cannot use
25036              these as they may hold arguments to the function.  Instead we
25037              attempt to locate a call preserved register which is used by this
25038              function.  If we can find one, then we know that it will have
25039              been pushed at the start of the prologue and so we can corrupt
25040              it now.  */
25041           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25042             if (live_regs_mask & (1 << regno))
25043               break;
25044
25045           gcc_assert(regno <= LAST_LO_REGNUM);
25046
25047           reg = gen_rtx_REG (SImode, regno);
25048
25049           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25050
25051           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25052                                         stack_pointer_rtx, reg));
25053
25054           dwarf = gen_rtx_SET (stack_pointer_rtx,
25055                                plus_constant (Pmode, stack_pointer_rtx,
25056                                               -amount));
25057           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25058           RTX_FRAME_RELATED_P (insn) = 1;
25059         }
25060     }
25061
25062   if (frame_pointer_needed)
25063     thumb_set_frame_pointer (offsets);
25064
25065   /* If we are profiling, make sure no instructions are scheduled before
25066      the call to mcount.  Similarly if the user has requested no
25067      scheduling in the prolog.  Similarly if we want non-call exceptions
25068      using the EABI unwinder, to prevent faulting instructions from being
25069      swapped with a stack adjustment.  */
25070   if (crtl->profile || !TARGET_SCHED_PROLOG
25071       || (arm_except_unwind_info (&global_options) == UI_TARGET
25072           && cfun->can_throw_non_call_exceptions))
25073     emit_insn (gen_blockage ());
25074
25075   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25076   if (live_regs_mask & 0xff)
25077     cfun->machine->lr_save_eliminated = 0;
25078 }
25079
25080 /* Clear caller saved registers not used to pass return values and leaked
25081    condition flags before exiting a cmse_nonsecure_entry function.  */
25082
25083 void
25084 cmse_nonsecure_entry_clear_before_return (void)
25085 {
25086   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25087   uint32_t padding_bits_to_clear = 0;
25088   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25089   auto_sbitmap to_clear_bitmap (maxregno + 1);
25090   tree result_type;
25091   rtx result_rtl;
25092
25093   bitmap_clear (to_clear_bitmap);
25094   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25095   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25096
25097   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25098      registers.  */
25099   if (TARGET_HARD_FLOAT)
25100     {
25101       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25102
25103       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25104
25105       /* Make sure we don't clear the two scratch registers used to clear the
25106          relevant FPSCR bits in output_return_instruction.  */
25107       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25108       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25109       emit_use (gen_rtx_REG (SImode, 4));
25110       bitmap_clear_bit (to_clear_bitmap, 4);
25111     }
25112
25113   /* If the user has defined registers to be caller saved, these are no longer
25114      restored by the function before returning and must thus be cleared for
25115      security purposes.  */
25116   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25117     {
25118       /* We do not touch registers that can be used to pass arguments as per
25119          the AAPCS, since these should never be made callee-saved by user
25120          options.  */
25121       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25122         continue;
25123       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25124         continue;
25125       if (call_used_regs[regno])
25126         bitmap_set_bit (to_clear_bitmap, regno);
25127     }
25128
25129   /* Make sure we do not clear the registers used to return the result in.  */
25130   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25131   if (!VOID_TYPE_P (result_type))
25132     {
25133       uint64_t to_clear_return_mask;
25134       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25135
25136       /* No need to check that we return in registers, because we don't
25137          support returning on stack yet.  */
25138       gcc_assert (REG_P (result_rtl));
25139       to_clear_return_mask
25140         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25141                                      padding_bits_to_clear_ptr);
25142       if (to_clear_return_mask)
25143         {
25144           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25145           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25146             {
25147               if (to_clear_return_mask & (1ULL << regno))
25148                 bitmap_clear_bit (to_clear_bitmap, regno);
25149             }
25150         }
25151     }
25152
25153   if (padding_bits_to_clear != 0)
25154     {
25155       rtx reg_rtx;
25156       auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25157
25158       /* Padding bits to clear is not 0 so we know we are dealing with
25159          returning a composite type, which only uses r0.  Let's make sure that
25160          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25161       bitmap_clear (to_clear_arg_regs_bitmap);
25162       bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25163                         NUM_ARG_REGS - 1);
25164       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25165
25166       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25167
25168       /* Fill the lower half of the negated padding_bits_to_clear.  */
25169       emit_move_insn (reg_rtx,
25170                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25171
25172       /* Also fill the top half of the negated padding_bits_to_clear.  */
25173       if (((~padding_bits_to_clear) >> 16) > 0)
25174         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25175                                                       GEN_INT (16),
25176                                                       GEN_INT (16)),
25177                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25178
25179       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25180                            gen_rtx_REG (SImode, R0_REGNUM),
25181                            reg_rtx));
25182     }
25183
25184   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25185     {
25186       if (!bitmap_bit_p (to_clear_bitmap, regno))
25187         continue;
25188
25189       if (IS_VFP_REGNUM (regno))
25190         {
25191           /* If regno is an even vfp register and its successor is also to
25192              be cleared, use vmov.  */
25193           if (TARGET_VFP_DOUBLE
25194               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25195               && bitmap_bit_p (to_clear_bitmap, regno + 1))
25196             {
25197               emit_move_insn (gen_rtx_REG (DFmode, regno),
25198                               CONST1_RTX (DFmode));
25199               emit_use (gen_rtx_REG (DFmode, regno));
25200               regno++;
25201             }
25202           else
25203             {
25204               emit_move_insn (gen_rtx_REG (SFmode, regno),
25205                               CONST1_RTX (SFmode));
25206               emit_use (gen_rtx_REG (SFmode, regno));
25207             }
25208         }
25209       else
25210         {
25211           if (TARGET_THUMB1)
25212             {
25213               if (regno == R0_REGNUM)
25214                 emit_move_insn (gen_rtx_REG (SImode, regno),
25215                                 const0_rtx);
25216               else
25217                 /* R0 has either been cleared before, see code above, or it
25218                    holds a return value, either way it is not secret
25219                    information.  */
25220                 emit_move_insn (gen_rtx_REG (SImode, regno),
25221                                 gen_rtx_REG (SImode, R0_REGNUM));
25222               emit_use (gen_rtx_REG (SImode, regno));
25223             }
25224           else
25225             {
25226               emit_move_insn (gen_rtx_REG (SImode, regno),
25227                               gen_rtx_REG (SImode, LR_REGNUM));
25228               emit_use (gen_rtx_REG (SImode, regno));
25229             }
25230         }
25231     }
25232 }
25233
25234 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25235    POP instruction can be generated.  LR should be replaced by PC.  All
25236    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25237    all we really need to check here is if single register is to be
25238    returned, or multiple register return.  */
25239 void
25240 thumb2_expand_return (bool simple_return)
25241 {
25242   int i, num_regs;
25243   unsigned long saved_regs_mask;
25244   arm_stack_offsets *offsets;
25245
25246   offsets = arm_get_frame_offsets ();
25247   saved_regs_mask = offsets->saved_regs_mask;
25248
25249   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25250     if (saved_regs_mask & (1 << i))
25251       num_regs++;
25252
25253   if (!simple_return && saved_regs_mask)
25254     {
25255       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25256          functions or adapt code to handle according to ACLE.  This path should
25257          not be reachable for cmse_nonsecure_entry functions though we prefer
25258          to assert it for now to ensure that future code changes do not silently
25259          change this behavior.  */
25260       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25261       if (num_regs == 1)
25262         {
25263           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25264           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25265           rtx addr = gen_rtx_MEM (SImode,
25266                                   gen_rtx_POST_INC (SImode,
25267                                                     stack_pointer_rtx));
25268           set_mem_alias_set (addr, get_frame_alias_set ());
25269           XVECEXP (par, 0, 0) = ret_rtx;
25270           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25271           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25272           emit_jump_insn (par);
25273         }
25274       else
25275         {
25276           saved_regs_mask &= ~ (1 << LR_REGNUM);
25277           saved_regs_mask |=   (1 << PC_REGNUM);
25278           arm_emit_multi_reg_pop (saved_regs_mask);
25279         }
25280     }
25281   else
25282     {
25283       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25284         cmse_nonsecure_entry_clear_before_return ();
25285       emit_jump_insn (simple_return_rtx);
25286     }
25287 }
25288
25289 void
25290 thumb1_expand_epilogue (void)
25291 {
25292   HOST_WIDE_INT amount;
25293   arm_stack_offsets *offsets;
25294   int regno;
25295
25296   /* Naked functions don't have prologues.  */
25297   if (IS_NAKED (arm_current_func_type ()))
25298     return;
25299
25300   offsets = arm_get_frame_offsets ();
25301   amount = offsets->outgoing_args - offsets->saved_regs;
25302
25303   if (frame_pointer_needed)
25304     {
25305       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25306       amount = offsets->locals_base - offsets->saved_regs;
25307     }
25308   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25309
25310   gcc_assert (amount >= 0);
25311   if (amount)
25312     {
25313       emit_insn (gen_blockage ());
25314
25315       if (amount < 512)
25316         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25317                                GEN_INT (amount)));
25318       else
25319         {
25320           /* r3 is always free in the epilogue.  */
25321           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25322
25323           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25324           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25325         }
25326     }
25327
25328   /* Emit a USE (stack_pointer_rtx), so that
25329      the stack adjustment will not be deleted.  */
25330   emit_insn (gen_force_register_use (stack_pointer_rtx));
25331
25332   if (crtl->profile || !TARGET_SCHED_PROLOG)
25333     emit_insn (gen_blockage ());
25334
25335   /* Emit a clobber for each insn that will be restored in the epilogue,
25336      so that flow2 will get register lifetimes correct.  */
25337   for (regno = 0; regno < 13; regno++)
25338     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25339       emit_clobber (gen_rtx_REG (SImode, regno));
25340
25341   if (! df_regs_ever_live_p (LR_REGNUM))
25342     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25343
25344   /* Clear all caller-saved regs that are not used to return.  */
25345   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25346     cmse_nonsecure_entry_clear_before_return ();
25347 }
25348
25349 /* Epilogue code for APCS frame.  */
25350 static void
25351 arm_expand_epilogue_apcs_frame (bool really_return)
25352 {
25353   unsigned long func_type;
25354   unsigned long saved_regs_mask;
25355   int num_regs = 0;
25356   int i;
25357   int floats_from_frame = 0;
25358   arm_stack_offsets *offsets;
25359
25360   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25361   func_type = arm_current_func_type ();
25362
25363   /* Get frame offsets for ARM.  */
25364   offsets = arm_get_frame_offsets ();
25365   saved_regs_mask = offsets->saved_regs_mask;
25366
25367   /* Find the offset of the floating-point save area in the frame.  */
25368   floats_from_frame
25369     = (offsets->saved_args
25370        + arm_compute_static_chain_stack_bytes ()
25371        - offsets->frame);
25372
25373   /* Compute how many core registers saved and how far away the floats are.  */
25374   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25375     if (saved_regs_mask & (1 << i))
25376       {
25377         num_regs++;
25378         floats_from_frame += 4;
25379       }
25380
25381   if (TARGET_HARD_FLOAT)
25382     {
25383       int start_reg;
25384       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25385
25386       /* The offset is from IP_REGNUM.  */
25387       int saved_size = arm_get_vfp_saved_size ();
25388       if (saved_size > 0)
25389         {
25390           rtx_insn *insn;
25391           floats_from_frame += saved_size;
25392           insn = emit_insn (gen_addsi3 (ip_rtx,
25393                                         hard_frame_pointer_rtx,
25394                                         GEN_INT (-floats_from_frame)));
25395           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25396                                        ip_rtx, hard_frame_pointer_rtx);
25397         }
25398
25399       /* Generate VFP register multi-pop.  */
25400       start_reg = FIRST_VFP_REGNUM;
25401
25402       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25403         /* Look for a case where a reg does not need restoring.  */
25404         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25405             && (!df_regs_ever_live_p (i + 1)
25406                 || call_used_regs[i + 1]))
25407           {
25408             if (start_reg != i)
25409               arm_emit_vfp_multi_reg_pop (start_reg,
25410                                           (i - start_reg) / 2,
25411                                           gen_rtx_REG (SImode,
25412                                                        IP_REGNUM));
25413             start_reg = i + 2;
25414           }
25415
25416       /* Restore the remaining regs that we have discovered (or possibly
25417          even all of them, if the conditional in the for loop never
25418          fired).  */
25419       if (start_reg != i)
25420         arm_emit_vfp_multi_reg_pop (start_reg,
25421                                     (i - start_reg) / 2,
25422                                     gen_rtx_REG (SImode, IP_REGNUM));
25423     }
25424
25425   if (TARGET_IWMMXT)
25426     {
25427       /* The frame pointer is guaranteed to be non-double-word aligned, as
25428          it is set to double-word-aligned old_stack_pointer - 4.  */
25429       rtx_insn *insn;
25430       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25431
25432       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25433         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25434           {
25435             rtx addr = gen_frame_mem (V2SImode,
25436                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25437                                                 - lrm_count * 4));
25438             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25439             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25440                                                gen_rtx_REG (V2SImode, i),
25441                                                NULL_RTX);
25442             lrm_count += 2;
25443           }
25444     }
25445
25446   /* saved_regs_mask should contain IP which contains old stack pointer
25447      at the time of activation creation.  Since SP and IP are adjacent registers,
25448      we can restore the value directly into SP.  */
25449   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25450   saved_regs_mask &= ~(1 << IP_REGNUM);
25451   saved_regs_mask |= (1 << SP_REGNUM);
25452
25453   /* There are two registers left in saved_regs_mask - LR and PC.  We
25454      only need to restore LR (the return address), but to
25455      save time we can load it directly into PC, unless we need a
25456      special function exit sequence, or we are not really returning.  */
25457   if (really_return
25458       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25459       && !crtl->calls_eh_return)
25460     /* Delete LR from the register mask, so that LR on
25461        the stack is loaded into the PC in the register mask.  */
25462     saved_regs_mask &= ~(1 << LR_REGNUM);
25463   else
25464     saved_regs_mask &= ~(1 << PC_REGNUM);
25465
25466   num_regs = bit_count (saved_regs_mask);
25467   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25468     {
25469       rtx_insn *insn;
25470       emit_insn (gen_blockage ());
25471       /* Unwind the stack to just below the saved registers.  */
25472       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25473                                     hard_frame_pointer_rtx,
25474                                     GEN_INT (- 4 * num_regs)));
25475
25476       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25477                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25478     }
25479
25480   arm_emit_multi_reg_pop (saved_regs_mask);
25481
25482   if (IS_INTERRUPT (func_type))
25483     {
25484       /* Interrupt handlers will have pushed the
25485          IP onto the stack, so restore it now.  */
25486       rtx_insn *insn;
25487       rtx addr = gen_rtx_MEM (SImode,
25488                               gen_rtx_POST_INC (SImode,
25489                               stack_pointer_rtx));
25490       set_mem_alias_set (addr, get_frame_alias_set ());
25491       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25492       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25493                                          gen_rtx_REG (SImode, IP_REGNUM),
25494                                          NULL_RTX);
25495     }
25496
25497   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25498     return;
25499
25500   if (crtl->calls_eh_return)
25501     emit_insn (gen_addsi3 (stack_pointer_rtx,
25502                            stack_pointer_rtx,
25503                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25504
25505   if (IS_STACKALIGN (func_type))
25506     /* Restore the original stack pointer.  Before prologue, the stack was
25507        realigned and the original stack pointer saved in r0.  For details,
25508        see comment in arm_expand_prologue.  */
25509     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25510
25511   emit_jump_insn (simple_return_rtx);
25512 }
25513
25514 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25515    function is not a sibcall.  */
25516 void
25517 arm_expand_epilogue (bool really_return)
25518 {
25519   unsigned long func_type;
25520   unsigned long saved_regs_mask;
25521   int num_regs = 0;
25522   int i;
25523   int amount;
25524   arm_stack_offsets *offsets;
25525
25526   func_type = arm_current_func_type ();
25527
25528   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25529      let output_return_instruction take care of instruction emission if any.  */
25530   if (IS_NAKED (func_type)
25531       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25532     {
25533       if (really_return)
25534         emit_jump_insn (simple_return_rtx);
25535       return;
25536     }
25537
25538   /* If we are throwing an exception, then we really must be doing a
25539      return, so we can't tail-call.  */
25540   gcc_assert (!crtl->calls_eh_return || really_return);
25541
25542   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25543     {
25544       arm_expand_epilogue_apcs_frame (really_return);
25545       return;
25546     }
25547
25548   /* Get frame offsets for ARM.  */
25549   offsets = arm_get_frame_offsets ();
25550   saved_regs_mask = offsets->saved_regs_mask;
25551   num_regs = bit_count (saved_regs_mask);
25552
25553   if (frame_pointer_needed)
25554     {
25555       rtx_insn *insn;
25556       /* Restore stack pointer if necessary.  */
25557       if (TARGET_ARM)
25558         {
25559           /* In ARM mode, frame pointer points to first saved register.
25560              Restore stack pointer to last saved register.  */
25561           amount = offsets->frame - offsets->saved_regs;
25562
25563           /* Force out any pending memory operations that reference stacked data
25564              before stack de-allocation occurs.  */
25565           emit_insn (gen_blockage ());
25566           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25567                             hard_frame_pointer_rtx,
25568                             GEN_INT (amount)));
25569           arm_add_cfa_adjust_cfa_note (insn, amount,
25570                                        stack_pointer_rtx,
25571                                        hard_frame_pointer_rtx);
25572
25573           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25574              deleted.  */
25575           emit_insn (gen_force_register_use (stack_pointer_rtx));
25576         }
25577       else
25578         {
25579           /* In Thumb-2 mode, the frame pointer points to the last saved
25580              register.  */
25581           amount = offsets->locals_base - offsets->saved_regs;
25582           if (amount)
25583             {
25584               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25585                                 hard_frame_pointer_rtx,
25586                                 GEN_INT (amount)));
25587               arm_add_cfa_adjust_cfa_note (insn, amount,
25588                                            hard_frame_pointer_rtx,
25589                                            hard_frame_pointer_rtx);
25590             }
25591
25592           /* Force out any pending memory operations that reference stacked data
25593              before stack de-allocation occurs.  */
25594           emit_insn (gen_blockage ());
25595           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25596                                        hard_frame_pointer_rtx));
25597           arm_add_cfa_adjust_cfa_note (insn, 0,
25598                                        stack_pointer_rtx,
25599                                        hard_frame_pointer_rtx);
25600           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25601              deleted.  */
25602           emit_insn (gen_force_register_use (stack_pointer_rtx));
25603         }
25604     }
25605   else
25606     {
25607       /* Pop off outgoing args and local frame to adjust stack pointer to
25608          last saved register.  */
25609       amount = offsets->outgoing_args - offsets->saved_regs;
25610       if (amount)
25611         {
25612           rtx_insn *tmp;
25613           /* Force out any pending memory operations that reference stacked data
25614              before stack de-allocation occurs.  */
25615           emit_insn (gen_blockage ());
25616           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25617                                        stack_pointer_rtx,
25618                                        GEN_INT (amount)));
25619           arm_add_cfa_adjust_cfa_note (tmp, amount,
25620                                        stack_pointer_rtx, stack_pointer_rtx);
25621           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25622              not deleted.  */
25623           emit_insn (gen_force_register_use (stack_pointer_rtx));
25624         }
25625     }
25626
25627   if (TARGET_HARD_FLOAT)
25628     {
25629       /* Generate VFP register multi-pop.  */
25630       int end_reg = LAST_VFP_REGNUM + 1;
25631
25632       /* Scan the registers in reverse order.  We need to match
25633          any groupings made in the prologue and generate matching
25634          vldm operations.  The need to match groups is because,
25635          unlike pop, vldm can only do consecutive regs.  */
25636       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25637         /* Look for a case where a reg does not need restoring.  */
25638         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25639             && (!df_regs_ever_live_p (i + 1)
25640                 || call_used_regs[i + 1]))
25641           {
25642             /* Restore the regs discovered so far (from reg+2 to
25643                end_reg).  */
25644             if (end_reg > i + 2)
25645               arm_emit_vfp_multi_reg_pop (i + 2,
25646                                           (end_reg - (i + 2)) / 2,
25647                                           stack_pointer_rtx);
25648             end_reg = i;
25649           }
25650
25651       /* Restore the remaining regs that we have discovered (or possibly
25652          even all of them, if the conditional in the for loop never
25653          fired).  */
25654       if (end_reg > i + 2)
25655         arm_emit_vfp_multi_reg_pop (i + 2,
25656                                     (end_reg - (i + 2)) / 2,
25657                                     stack_pointer_rtx);
25658     }
25659
25660   if (TARGET_IWMMXT)
25661     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25662       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25663         {
25664           rtx_insn *insn;
25665           rtx addr = gen_rtx_MEM (V2SImode,
25666                                   gen_rtx_POST_INC (SImode,
25667                                                     stack_pointer_rtx));
25668           set_mem_alias_set (addr, get_frame_alias_set ());
25669           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25670           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25671                                              gen_rtx_REG (V2SImode, i),
25672                                              NULL_RTX);
25673           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25674                                        stack_pointer_rtx, stack_pointer_rtx);
25675         }
25676
25677   if (saved_regs_mask)
25678     {
25679       rtx insn;
25680       bool return_in_pc = false;
25681
25682       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25683           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25684           && !IS_CMSE_ENTRY (func_type)
25685           && !IS_STACKALIGN (func_type)
25686           && really_return
25687           && crtl->args.pretend_args_size == 0
25688           && saved_regs_mask & (1 << LR_REGNUM)
25689           && !crtl->calls_eh_return)
25690         {
25691           saved_regs_mask &= ~(1 << LR_REGNUM);
25692           saved_regs_mask |= (1 << PC_REGNUM);
25693           return_in_pc = true;
25694         }
25695
25696       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25697         {
25698           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25699             if (saved_regs_mask & (1 << i))
25700               {
25701                 rtx addr = gen_rtx_MEM (SImode,
25702                                         gen_rtx_POST_INC (SImode,
25703                                                           stack_pointer_rtx));
25704                 set_mem_alias_set (addr, get_frame_alias_set ());
25705
25706                 if (i == PC_REGNUM)
25707                   {
25708                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25709                     XVECEXP (insn, 0, 0) = ret_rtx;
25710                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25711                                                         addr);
25712                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25713                     insn = emit_jump_insn (insn);
25714                   }
25715                 else
25716                   {
25717                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25718                                                  addr));
25719                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25720                                                        gen_rtx_REG (SImode, i),
25721                                                        NULL_RTX);
25722                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25723                                                  stack_pointer_rtx,
25724                                                  stack_pointer_rtx);
25725                   }
25726               }
25727         }
25728       else
25729         {
25730           if (TARGET_LDRD
25731               && current_tune->prefer_ldrd_strd
25732               && !optimize_function_for_size_p (cfun))
25733             {
25734               if (TARGET_THUMB2)
25735                 thumb2_emit_ldrd_pop (saved_regs_mask);
25736               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25737                 arm_emit_ldrd_pop (saved_regs_mask);
25738               else
25739                 arm_emit_multi_reg_pop (saved_regs_mask);
25740             }
25741           else
25742             arm_emit_multi_reg_pop (saved_regs_mask);
25743         }
25744
25745       if (return_in_pc)
25746         return;
25747     }
25748
25749   amount
25750     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25751   if (amount)
25752     {
25753       int i, j;
25754       rtx dwarf = NULL_RTX;
25755       rtx_insn *tmp =
25756         emit_insn (gen_addsi3 (stack_pointer_rtx,
25757                                stack_pointer_rtx,
25758                                GEN_INT (amount)));
25759
25760       RTX_FRAME_RELATED_P (tmp) = 1;
25761
25762       if (cfun->machine->uses_anonymous_args)
25763         {
25764           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25765              pretend_args in stack.  */
25766           int num_regs = crtl->args.pretend_args_size / 4;
25767           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25768           for (j = 0, i = 0; j < num_regs; i++)
25769             if (saved_regs_mask & (1 << i))
25770               {
25771                 rtx reg = gen_rtx_REG (SImode, i);
25772                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25773                 j++;
25774               }
25775           REG_NOTES (tmp) = dwarf;
25776         }
25777       arm_add_cfa_adjust_cfa_note (tmp, amount,
25778                                    stack_pointer_rtx, stack_pointer_rtx);
25779     }
25780
25781     /* Clear all caller-saved regs that are not used to return.  */
25782     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25783       {
25784         /* CMSE_ENTRY always returns.  */
25785         gcc_assert (really_return);
25786         cmse_nonsecure_entry_clear_before_return ();
25787       }
25788
25789   if (!really_return)
25790     return;
25791
25792   if (crtl->calls_eh_return)
25793     emit_insn (gen_addsi3 (stack_pointer_rtx,
25794                            stack_pointer_rtx,
25795                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25796
25797   if (IS_STACKALIGN (func_type))
25798     /* Restore the original stack pointer.  Before prologue, the stack was
25799        realigned and the original stack pointer saved in r0.  For details,
25800        see comment in arm_expand_prologue.  */
25801     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25802
25803   emit_jump_insn (simple_return_rtx);
25804 }
25805
25806 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25807    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25808
25809 const char *
25810 thumb1_output_interwork (void)
25811 {
25812   const char * name;
25813   FILE *f = asm_out_file;
25814
25815   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25816   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25817               == SYMBOL_REF);
25818   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25819
25820   /* Generate code sequence to switch us into Thumb mode.  */
25821   /* The .code 32 directive has already been emitted by
25822      ASM_DECLARE_FUNCTION_NAME.  */
25823   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25824   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25825
25826   /* Generate a label, so that the debugger will notice the
25827      change in instruction sets.  This label is also used by
25828      the assembler to bypass the ARM code when this function
25829      is called from a Thumb encoded function elsewhere in the
25830      same file.  Hence the definition of STUB_NAME here must
25831      agree with the definition in gas/config/tc-arm.c.  */
25832
25833 #define STUB_NAME ".real_start_of"
25834
25835   fprintf (f, "\t.code\t16\n");
25836 #ifdef ARM_PE
25837   if (arm_dllexport_name_p (name))
25838     name = arm_strip_name_encoding (name);
25839 #endif
25840   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25841   fprintf (f, "\t.thumb_func\n");
25842   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25843
25844   return "";
25845 }
25846
25847 /* Handle the case of a double word load into a low register from
25848    a computed memory address.  The computed address may involve a
25849    register which is overwritten by the load.  */
25850 const char *
25851 thumb_load_double_from_address (rtx *operands)
25852 {
25853   rtx addr;
25854   rtx base;
25855   rtx offset;
25856   rtx arg1;
25857   rtx arg2;
25858
25859   gcc_assert (REG_P (operands[0]));
25860   gcc_assert (MEM_P (operands[1]));
25861
25862   /* Get the memory address.  */
25863   addr = XEXP (operands[1], 0);
25864
25865   /* Work out how the memory address is computed.  */
25866   switch (GET_CODE (addr))
25867     {
25868     case REG:
25869       operands[2] = adjust_address (operands[1], SImode, 4);
25870
25871       if (REGNO (operands[0]) == REGNO (addr))
25872         {
25873           output_asm_insn ("ldr\t%H0, %2", operands);
25874           output_asm_insn ("ldr\t%0, %1", operands);
25875         }
25876       else
25877         {
25878           output_asm_insn ("ldr\t%0, %1", operands);
25879           output_asm_insn ("ldr\t%H0, %2", operands);
25880         }
25881       break;
25882
25883     case CONST:
25884       /* Compute <address> + 4 for the high order load.  */
25885       operands[2] = adjust_address (operands[1], SImode, 4);
25886
25887       output_asm_insn ("ldr\t%0, %1", operands);
25888       output_asm_insn ("ldr\t%H0, %2", operands);
25889       break;
25890
25891     case PLUS:
25892       arg1   = XEXP (addr, 0);
25893       arg2   = XEXP (addr, 1);
25894
25895       if (CONSTANT_P (arg1))
25896         base = arg2, offset = arg1;
25897       else
25898         base = arg1, offset = arg2;
25899
25900       gcc_assert (REG_P (base));
25901
25902       /* Catch the case of <address> = <reg> + <reg> */
25903       if (REG_P (offset))
25904         {
25905           int reg_offset = REGNO (offset);
25906           int reg_base   = REGNO (base);
25907           int reg_dest   = REGNO (operands[0]);
25908
25909           /* Add the base and offset registers together into the
25910              higher destination register.  */
25911           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25912                        reg_dest + 1, reg_base, reg_offset);
25913
25914           /* Load the lower destination register from the address in
25915              the higher destination register.  */
25916           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25917                        reg_dest, reg_dest + 1);
25918
25919           /* Load the higher destination register from its own address
25920              plus 4.  */
25921           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25922                        reg_dest + 1, reg_dest + 1);
25923         }
25924       else
25925         {
25926           /* Compute <address> + 4 for the high order load.  */
25927           operands[2] = adjust_address (operands[1], SImode, 4);
25928
25929           /* If the computed address is held in the low order register
25930              then load the high order register first, otherwise always
25931              load the low order register first.  */
25932           if (REGNO (operands[0]) == REGNO (base))
25933             {
25934               output_asm_insn ("ldr\t%H0, %2", operands);
25935               output_asm_insn ("ldr\t%0, %1", operands);
25936             }
25937           else
25938             {
25939               output_asm_insn ("ldr\t%0, %1", operands);
25940               output_asm_insn ("ldr\t%H0, %2", operands);
25941             }
25942         }
25943       break;
25944
25945     case LABEL_REF:
25946       /* With no registers to worry about we can just load the value
25947          directly.  */
25948       operands[2] = adjust_address (operands[1], SImode, 4);
25949
25950       output_asm_insn ("ldr\t%H0, %2", operands);
25951       output_asm_insn ("ldr\t%0, %1", operands);
25952       break;
25953
25954     default:
25955       gcc_unreachable ();
25956     }
25957
25958   return "";
25959 }
25960
25961 const char *
25962 thumb_output_move_mem_multiple (int n, rtx *operands)
25963 {
25964   switch (n)
25965     {
25966     case 2:
25967       if (REGNO (operands[4]) > REGNO (operands[5]))
25968         std::swap (operands[4], operands[5]);
25969
25970       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25971       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25972       break;
25973
25974     case 3:
25975       if (REGNO (operands[4]) > REGNO (operands[5]))
25976         std::swap (operands[4], operands[5]);
25977       if (REGNO (operands[5]) > REGNO (operands[6]))
25978         std::swap (operands[5], operands[6]);
25979       if (REGNO (operands[4]) > REGNO (operands[5]))
25980         std::swap (operands[4], operands[5]);
25981
25982       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25983       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25984       break;
25985
25986     default:
25987       gcc_unreachable ();
25988     }
25989
25990   return "";
25991 }
25992
25993 /* Output a call-via instruction for thumb state.  */
25994 const char *
25995 thumb_call_via_reg (rtx reg)
25996 {
25997   int regno = REGNO (reg);
25998   rtx *labelp;
25999
26000   gcc_assert (regno < LR_REGNUM);
26001
26002   /* If we are in the normal text section we can use a single instance
26003      per compilation unit.  If we are doing function sections, then we need
26004      an entry per section, since we can't rely on reachability.  */
26005   if (in_section == text_section)
26006     {
26007       thumb_call_reg_needed = 1;
26008
26009       if (thumb_call_via_label[regno] == NULL)
26010         thumb_call_via_label[regno] = gen_label_rtx ();
26011       labelp = thumb_call_via_label + regno;
26012     }
26013   else
26014     {
26015       if (cfun->machine->call_via[regno] == NULL)
26016         cfun->machine->call_via[regno] = gen_label_rtx ();
26017       labelp = cfun->machine->call_via + regno;
26018     }
26019
26020   output_asm_insn ("bl\t%a0", labelp);
26021   return "";
26022 }
26023
26024 /* Routines for generating rtl.  */
26025 void
26026 thumb_expand_movmemqi (rtx *operands)
26027 {
26028   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26029   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26030   HOST_WIDE_INT len = INTVAL (operands[2]);
26031   HOST_WIDE_INT offset = 0;
26032
26033   while (len >= 12)
26034     {
26035       emit_insn (gen_movmem12b (out, in, out, in));
26036       len -= 12;
26037     }
26038
26039   if (len >= 8)
26040     {
26041       emit_insn (gen_movmem8b (out, in, out, in));
26042       len -= 8;
26043     }
26044
26045   if (len >= 4)
26046     {
26047       rtx reg = gen_reg_rtx (SImode);
26048       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26049       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26050       len -= 4;
26051       offset += 4;
26052     }
26053
26054   if (len >= 2)
26055     {
26056       rtx reg = gen_reg_rtx (HImode);
26057       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26058                                               plus_constant (Pmode, in,
26059                                                              offset))));
26060       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26061                                                                 offset)),
26062                             reg));
26063       len -= 2;
26064       offset += 2;
26065     }
26066
26067   if (len)
26068     {
26069       rtx reg = gen_reg_rtx (QImode);
26070       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26071                                               plus_constant (Pmode, in,
26072                                                              offset))));
26073       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26074                                                                 offset)),
26075                             reg));
26076     }
26077 }
26078
26079 void
26080 thumb_reload_out_hi (rtx *operands)
26081 {
26082   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26083 }
26084
26085 /* Return the length of a function name prefix
26086     that starts with the character 'c'.  */
26087 static int
26088 arm_get_strip_length (int c)
26089 {
26090   switch (c)
26091     {
26092     ARM_NAME_ENCODING_LENGTHS
26093       default: return 0;
26094     }
26095 }
26096
26097 /* Return a pointer to a function's name with any
26098    and all prefix encodings stripped from it.  */
26099 const char *
26100 arm_strip_name_encoding (const char *name)
26101 {
26102   int skip;
26103
26104   while ((skip = arm_get_strip_length (* name)))
26105     name += skip;
26106
26107   return name;
26108 }
26109
26110 /* If there is a '*' anywhere in the name's prefix, then
26111    emit the stripped name verbatim, otherwise prepend an
26112    underscore if leading underscores are being used.  */
26113 void
26114 arm_asm_output_labelref (FILE *stream, const char *name)
26115 {
26116   int skip;
26117   int verbatim = 0;
26118
26119   while ((skip = arm_get_strip_length (* name)))
26120     {
26121       verbatim |= (*name == '*');
26122       name += skip;
26123     }
26124
26125   if (verbatim)
26126     fputs (name, stream);
26127   else
26128     asm_fprintf (stream, "%U%s", name);
26129 }
26130
26131 /* This function is used to emit an EABI tag and its associated value.
26132    We emit the numerical value of the tag in case the assembler does not
26133    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26134    the tag name in a comment so that anyone reading the assembler output
26135    will know which tag is being set.
26136
26137    This function is not static because arm-c.c needs it too.  */
26138
26139 void
26140 arm_emit_eabi_attribute (const char *name, int num, int val)
26141 {
26142   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26143   if (flag_verbose_asm || flag_debug_asm)
26144     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26145   asm_fprintf (asm_out_file, "\n");
26146 }
26147
26148 /* This function is used to print CPU tuning information as comment
26149    in assembler file.  Pointers are not printed for now.  */
26150
26151 void
26152 arm_print_tune_info (void)
26153 {
26154   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26155   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26156                current_tune->constant_limit);
26157   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26158                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26159   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26160                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26161   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162                "prefetch.l1_cache_size:\t%d\n",
26163                current_tune->prefetch.l1_cache_size);
26164   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26165                "prefetch.l1_cache_line_size:\t%d\n",
26166                current_tune->prefetch.l1_cache_line_size);
26167   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26168                "prefer_constant_pool:\t%d\n",
26169                (int) current_tune->prefer_constant_pool);
26170   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26171                "branch_cost:\t(s:speed, p:predictable)\n");
26172   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26173   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26174                current_tune->branch_cost (false, false));
26175   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26176                current_tune->branch_cost (false, true));
26177   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26178                current_tune->branch_cost (true, false));
26179   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26180                current_tune->branch_cost (true, true));
26181   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182                "prefer_ldrd_strd:\t%d\n",
26183                (int) current_tune->prefer_ldrd_strd);
26184   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26185                "logical_op_non_short_circuit:\t[%d,%d]\n",
26186                (int) current_tune->logical_op_non_short_circuit_thumb,
26187                (int) current_tune->logical_op_non_short_circuit_arm);
26188   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26189                "prefer_neon_for_64bits:\t%d\n",
26190                (int) current_tune->prefer_neon_for_64bits);
26191   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26192                "disparage_flag_setting_t16_encodings:\t%d\n",
26193                (int) current_tune->disparage_flag_setting_t16_encodings);
26194   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26195                "string_ops_prefer_neon:\t%d\n",
26196                (int) current_tune->string_ops_prefer_neon);
26197   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26198                "max_insns_inline_memset:\t%d\n",
26199                current_tune->max_insns_inline_memset);
26200   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26201                current_tune->fusible_ops);
26202   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26203                (int) current_tune->sched_autopref);
26204 }
26205
26206 /* Print .arch and .arch_extension directives corresponding to the
26207    current architecture configuration.  */
26208 static void
26209 arm_print_asm_arch_directives ()
26210 {
26211   const arch_option *arch
26212     = arm_parse_arch_option_name (all_architectures, "-march",
26213                                   arm_active_target.arch_name);
26214   auto_sbitmap opt_bits (isa_num_bits);
26215
26216   gcc_assert (arch);
26217
26218   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26219   if (!arch->common.extensions)
26220     return;
26221
26222   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26223        opt->name != NULL;
26224        opt++)
26225     {
26226       if (!opt->remove)
26227         {
26228           arm_initialize_isa (opt_bits, opt->isa_bits);
26229
26230           /* If every feature bit of this option is set in the target
26231              ISA specification, print out the option name.  However,
26232              don't print anything if all the bits are part of the
26233              FPU specification.  */
26234           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26235               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26236             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26237         }
26238     }
26239 }
26240
26241 static void
26242 arm_file_start (void)
26243 {
26244   int val;
26245
26246   if (TARGET_BPABI)
26247     {
26248       /* We don't have a specified CPU.  Use the architecture to
26249          generate the tags.
26250
26251          Note: it might be better to do this unconditionally, then the
26252          assembler would not need to know about all new CPU names as
26253          they are added.  */
26254       if (!arm_active_target.core_name)
26255         {
26256           /* armv7ve doesn't support any extensions.  */
26257           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26258             {
26259               /* Keep backward compatability for assemblers
26260                  which don't support armv7ve.  */
26261               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26262               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26263               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26264               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26265               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26266             }
26267           else
26268             arm_print_asm_arch_directives ();
26269         }
26270       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26271         asm_fprintf (asm_out_file, "\t.arch %s\n",
26272                      arm_active_target.core_name + 8);
26273       else
26274         {
26275           const char* truncated_name
26276             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26277           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26278         }
26279
26280       if (print_tune_info)
26281         arm_print_tune_info ();
26282
26283       if (! TARGET_SOFT_FLOAT)
26284         {
26285           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26286             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26287
26288           if (TARGET_HARD_FLOAT_ABI)
26289             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26290         }
26291
26292       /* Some of these attributes only apply when the corresponding features
26293          are used.  However we don't have any easy way of figuring this out.
26294          Conservatively record the setting that would have been used.  */
26295
26296       if (flag_rounding_math)
26297         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26298
26299       if (!flag_unsafe_math_optimizations)
26300         {
26301           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26302           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26303         }
26304       if (flag_signaling_nans)
26305         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26306
26307       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26308                            flag_finite_math_only ? 1 : 3);
26309
26310       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26311       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26312       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26313                                flag_short_enums ? 1 : 2);
26314
26315       /* Tag_ABI_optimization_goals.  */
26316       if (optimize_size)
26317         val = 4;
26318       else if (optimize >= 2)
26319         val = 2;
26320       else if (optimize)
26321         val = 1;
26322       else
26323         val = 6;
26324       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26325
26326       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26327                                unaligned_access);
26328
26329       if (arm_fp16_format)
26330         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26331                              (int) arm_fp16_format);
26332
26333       if (arm_lang_output_object_attributes_hook)
26334         arm_lang_output_object_attributes_hook();
26335     }
26336
26337   default_file_start ();
26338 }
26339
26340 static void
26341 arm_file_end (void)
26342 {
26343   int regno;
26344
26345   if (NEED_INDICATE_EXEC_STACK)
26346     /* Add .note.GNU-stack.  */
26347     file_end_indicate_exec_stack ();
26348
26349   if (! thumb_call_reg_needed)
26350     return;
26351
26352   switch_to_section (text_section);
26353   asm_fprintf (asm_out_file, "\t.code 16\n");
26354   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26355
26356   for (regno = 0; regno < LR_REGNUM; regno++)
26357     {
26358       rtx label = thumb_call_via_label[regno];
26359
26360       if (label != 0)
26361         {
26362           targetm.asm_out.internal_label (asm_out_file, "L",
26363                                           CODE_LABEL_NUMBER (label));
26364           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26365         }
26366     }
26367 }
26368
26369 #ifndef ARM_PE
26370 /* Symbols in the text segment can be accessed without indirecting via the
26371    constant pool; it may take an extra binary operation, but this is still
26372    faster than indirecting via memory.  Don't do this when not optimizing,
26373    since we won't be calculating al of the offsets necessary to do this
26374    simplification.  */
26375
26376 static void
26377 arm_encode_section_info (tree decl, rtx rtl, int first)
26378 {
26379   if (optimize > 0 && TREE_CONSTANT (decl))
26380     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26381
26382   default_encode_section_info (decl, rtl, first);
26383 }
26384 #endif /* !ARM_PE */
26385
26386 static void
26387 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26388 {
26389   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26390       && !strcmp (prefix, "L"))
26391     {
26392       arm_ccfsm_state = 0;
26393       arm_target_insn = NULL;
26394     }
26395   default_internal_label (stream, prefix, labelno);
26396 }
26397
26398 /* Output code to add DELTA to the first argument, and then jump
26399    to FUNCTION.  Used for C++ multiple inheritance.  */
26400
26401 static void
26402 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26403                      HOST_WIDE_INT, tree function)
26404 {
26405   static int thunk_label = 0;
26406   char label[256];
26407   char labelpc[256];
26408   int mi_delta = delta;
26409   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26410   int shift = 0;
26411   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26412                     ? 1 : 0);
26413   if (mi_delta < 0)
26414     mi_delta = - mi_delta;
26415
26416   final_start_function (emit_barrier (), file, 1);
26417
26418   if (TARGET_THUMB1)
26419     {
26420       int labelno = thunk_label++;
26421       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26422       /* Thunks are entered in arm mode when available.  */
26423       if (TARGET_THUMB1_ONLY)
26424         {
26425           /* push r3 so we can use it as a temporary.  */
26426           /* TODO: Omit this save if r3 is not used.  */
26427           fputs ("\tpush {r3}\n", file);
26428           fputs ("\tldr\tr3, ", file);
26429         }
26430       else
26431         {
26432           fputs ("\tldr\tr12, ", file);
26433         }
26434       assemble_name (file, label);
26435       fputc ('\n', file);
26436       if (flag_pic)
26437         {
26438           /* If we are generating PIC, the ldr instruction below loads
26439              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26440              the address of the add + 8, so we have:
26441
26442              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26443                  = target + 1.
26444
26445              Note that we have "+ 1" because some versions of GNU ld
26446              don't set the low bit of the result for R_ARM_REL32
26447              relocations against thumb function symbols.
26448              On ARMv6M this is +4, not +8.  */
26449           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26450           assemble_name (file, labelpc);
26451           fputs (":\n", file);
26452           if (TARGET_THUMB1_ONLY)
26453             {
26454               /* This is 2 insns after the start of the thunk, so we know it
26455                  is 4-byte aligned.  */
26456               fputs ("\tadd\tr3, pc, r3\n", file);
26457               fputs ("\tmov r12, r3\n", file);
26458             }
26459           else
26460             fputs ("\tadd\tr12, pc, r12\n", file);
26461         }
26462       else if (TARGET_THUMB1_ONLY)
26463         fputs ("\tmov r12, r3\n", file);
26464     }
26465   if (TARGET_THUMB1_ONLY)
26466     {
26467       if (mi_delta > 255)
26468         {
26469           fputs ("\tldr\tr3, ", file);
26470           assemble_name (file, label);
26471           fputs ("+4\n", file);
26472           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26473                        mi_op, this_regno, this_regno);
26474         }
26475       else if (mi_delta != 0)
26476         {
26477           /* Thumb1 unified syntax requires s suffix in instruction name when
26478              one of the operands is immediate.  */
26479           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26480                        mi_op, this_regno, this_regno,
26481                        mi_delta);
26482         }
26483     }
26484   else
26485     {
26486       /* TODO: Use movw/movt for large constants when available.  */
26487       while (mi_delta != 0)
26488         {
26489           if ((mi_delta & (3 << shift)) == 0)
26490             shift += 2;
26491           else
26492             {
26493               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26494                            mi_op, this_regno, this_regno,
26495                            mi_delta & (0xff << shift));
26496               mi_delta &= ~(0xff << shift);
26497               shift += 8;
26498             }
26499         }
26500     }
26501   if (TARGET_THUMB1)
26502     {
26503       if (TARGET_THUMB1_ONLY)
26504         fputs ("\tpop\t{r3}\n", file);
26505
26506       fprintf (file, "\tbx\tr12\n");
26507       ASM_OUTPUT_ALIGN (file, 2);
26508       assemble_name (file, label);
26509       fputs (":\n", file);
26510       if (flag_pic)
26511         {
26512           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26513           rtx tem = XEXP (DECL_RTL (function), 0);
26514           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26515              pipeline offset is four rather than eight.  Adjust the offset
26516              accordingly.  */
26517           tem = plus_constant (GET_MODE (tem), tem,
26518                                TARGET_THUMB1_ONLY ? -3 : -7);
26519           tem = gen_rtx_MINUS (GET_MODE (tem),
26520                                tem,
26521                                gen_rtx_SYMBOL_REF (Pmode,
26522                                                    ggc_strdup (labelpc)));
26523           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26524         }
26525       else
26526         /* Output ".word .LTHUNKn".  */
26527         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26528
26529       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26530         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26531     }
26532   else
26533     {
26534       fputs ("\tb\t", file);
26535       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26536       if (NEED_PLT_RELOC)
26537         fputs ("(PLT)", file);
26538       fputc ('\n', file);
26539     }
26540
26541   final_end_function ();
26542 }
26543
26544 /* MI thunk handling for TARGET_32BIT.  */
26545
26546 static void
26547 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26548                        HOST_WIDE_INT vcall_offset, tree function)
26549 {
26550   /* On ARM, this_regno is R0 or R1 depending on
26551      whether the function returns an aggregate or not.
26552   */
26553   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26554                                        function)
26555                     ? R1_REGNUM : R0_REGNUM);
26556
26557   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26558   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26559   reload_completed = 1;
26560   emit_note (NOTE_INSN_PROLOGUE_END);
26561
26562   /* Add DELTA to THIS_RTX.  */
26563   if (delta != 0)
26564     arm_split_constant (PLUS, Pmode, NULL_RTX,
26565                         delta, this_rtx, this_rtx, false);
26566
26567   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26568   if (vcall_offset != 0)
26569     {
26570       /* Load *THIS_RTX.  */
26571       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26572       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26573       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26574                           false);
26575       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26576       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26577       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26578     }
26579
26580   /* Generate a tail call to the target function.  */
26581   if (!TREE_USED (function))
26582     {
26583       assemble_external (function);
26584       TREE_USED (function) = 1;
26585     }
26586   rtx funexp = XEXP (DECL_RTL (function), 0);
26587   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26588   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26589   SIBLING_CALL_P (insn) = 1;
26590
26591   insn = get_insns ();
26592   shorten_branches (insn);
26593   final_start_function (insn, file, 1);
26594   final (insn, file, 1);
26595   final_end_function ();
26596
26597   /* Stop pretending this is a post-reload pass.  */
26598   reload_completed = 0;
26599 }
26600
26601 /* Output code to add DELTA to the first argument, and then jump
26602    to FUNCTION.  Used for C++ multiple inheritance.  */
26603
26604 static void
26605 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26606                      HOST_WIDE_INT vcall_offset, tree function)
26607 {
26608   if (TARGET_32BIT)
26609     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26610   else
26611     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26612 }
26613
26614 int
26615 arm_emit_vector_const (FILE *file, rtx x)
26616 {
26617   int i;
26618   const char * pattern;
26619
26620   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26621
26622   switch (GET_MODE (x))
26623     {
26624     case E_V2SImode: pattern = "%08x"; break;
26625     case E_V4HImode: pattern = "%04x"; break;
26626     case E_V8QImode: pattern = "%02x"; break;
26627     default:       gcc_unreachable ();
26628     }
26629
26630   fprintf (file, "0x");
26631   for (i = CONST_VECTOR_NUNITS (x); i--;)
26632     {
26633       rtx element;
26634
26635       element = CONST_VECTOR_ELT (x, i);
26636       fprintf (file, pattern, INTVAL (element));
26637     }
26638
26639   return 1;
26640 }
26641
26642 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26643    HFmode constant pool entries are actually loaded with ldr.  */
26644 void
26645 arm_emit_fp16_const (rtx c)
26646 {
26647   long bits;
26648
26649   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26650   if (WORDS_BIG_ENDIAN)
26651     assemble_zeros (2);
26652   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26653   if (!WORDS_BIG_ENDIAN)
26654     assemble_zeros (2);
26655 }
26656
26657 const char *
26658 arm_output_load_gr (rtx *operands)
26659 {
26660   rtx reg;
26661   rtx offset;
26662   rtx wcgr;
26663   rtx sum;
26664
26665   if (!MEM_P (operands [1])
26666       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26667       || !REG_P (reg = XEXP (sum, 0))
26668       || !CONST_INT_P (offset = XEXP (sum, 1))
26669       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26670     return "wldrw%?\t%0, %1";
26671
26672   /* Fix up an out-of-range load of a GR register.  */
26673   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26674   wcgr = operands[0];
26675   operands[0] = reg;
26676   output_asm_insn ("ldr%?\t%0, %1", operands);
26677
26678   operands[0] = wcgr;
26679   operands[1] = reg;
26680   output_asm_insn ("tmcr%?\t%0, %1", operands);
26681   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26682
26683   return "";
26684 }
26685
26686 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26687
26688    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26689    named arg and all anonymous args onto the stack.
26690    XXX I know the prologue shouldn't be pushing registers, but it is faster
26691    that way.  */
26692
26693 static void
26694 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26695                             machine_mode mode,
26696                             tree type,
26697                             int *pretend_size,
26698                             int second_time ATTRIBUTE_UNUSED)
26699 {
26700   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26701   int nregs;
26702
26703   cfun->machine->uses_anonymous_args = 1;
26704   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26705     {
26706       nregs = pcum->aapcs_ncrn;
26707       if (nregs & 1)
26708         {
26709           int res = arm_needs_doubleword_align (mode, type);
26710           if (res < 0 && warn_psabi)
26711             inform (input_location, "parameter passing for argument of "
26712                     "type %qT changed in GCC 7.1", type);
26713           else if (res > 0)
26714             nregs++;
26715         }
26716     }
26717   else
26718     nregs = pcum->nregs;
26719
26720   if (nregs < NUM_ARG_REGS)
26721     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26722 }
26723
26724 /* We can't rely on the caller doing the proper promotion when
26725    using APCS or ATPCS.  */
26726
26727 static bool
26728 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26729 {
26730     return !TARGET_AAPCS_BASED;
26731 }
26732
26733 static machine_mode
26734 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26735                            machine_mode mode,
26736                            int *punsignedp ATTRIBUTE_UNUSED,
26737                            const_tree fntype ATTRIBUTE_UNUSED,
26738                            int for_return ATTRIBUTE_UNUSED)
26739 {
26740   if (GET_MODE_CLASS (mode) == MODE_INT
26741       && GET_MODE_SIZE (mode) < 4)
26742     return SImode;
26743
26744   return mode;
26745 }
26746
26747
26748 static bool
26749 arm_default_short_enums (void)
26750 {
26751   return ARM_DEFAULT_SHORT_ENUMS;
26752 }
26753
26754
26755 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26756
26757 static bool
26758 arm_align_anon_bitfield (void)
26759 {
26760   return TARGET_AAPCS_BASED;
26761 }
26762
26763
26764 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26765
26766 static tree
26767 arm_cxx_guard_type (void)
26768 {
26769   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26770 }
26771
26772
26773 /* The EABI says test the least significant bit of a guard variable.  */
26774
26775 static bool
26776 arm_cxx_guard_mask_bit (void)
26777 {
26778   return TARGET_AAPCS_BASED;
26779 }
26780
26781
26782 /* The EABI specifies that all array cookies are 8 bytes long.  */
26783
26784 static tree
26785 arm_get_cookie_size (tree type)
26786 {
26787   tree size;
26788
26789   if (!TARGET_AAPCS_BASED)
26790     return default_cxx_get_cookie_size (type);
26791
26792   size = build_int_cst (sizetype, 8);
26793   return size;
26794 }
26795
26796
26797 /* The EABI says that array cookies should also contain the element size.  */
26798
26799 static bool
26800 arm_cookie_has_size (void)
26801 {
26802   return TARGET_AAPCS_BASED;
26803 }
26804
26805
26806 /* The EABI says constructors and destructors should return a pointer to
26807    the object constructed/destroyed.  */
26808
26809 static bool
26810 arm_cxx_cdtor_returns_this (void)
26811 {
26812   return TARGET_AAPCS_BASED;
26813 }
26814
26815 /* The EABI says that an inline function may never be the key
26816    method.  */
26817
26818 static bool
26819 arm_cxx_key_method_may_be_inline (void)
26820 {
26821   return !TARGET_AAPCS_BASED;
26822 }
26823
26824 static void
26825 arm_cxx_determine_class_data_visibility (tree decl)
26826 {
26827   if (!TARGET_AAPCS_BASED
26828       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26829     return;
26830
26831   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26832      is exported.  However, on systems without dynamic vague linkage,
26833      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26834   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26835     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26836   else
26837     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26838   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26839 }
26840
26841 static bool
26842 arm_cxx_class_data_always_comdat (void)
26843 {
26844   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26845      vague linkage if the class has no key function.  */
26846   return !TARGET_AAPCS_BASED;
26847 }
26848
26849
26850 /* The EABI says __aeabi_atexit should be used to register static
26851    destructors.  */
26852
26853 static bool
26854 arm_cxx_use_aeabi_atexit (void)
26855 {
26856   return TARGET_AAPCS_BASED;
26857 }
26858
26859
26860 void
26861 arm_set_return_address (rtx source, rtx scratch)
26862 {
26863   arm_stack_offsets *offsets;
26864   HOST_WIDE_INT delta;
26865   rtx addr, mem;
26866   unsigned long saved_regs;
26867
26868   offsets = arm_get_frame_offsets ();
26869   saved_regs = offsets->saved_regs_mask;
26870
26871   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26872     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26873   else
26874     {
26875       if (frame_pointer_needed)
26876         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26877       else
26878         {
26879           /* LR will be the first saved register.  */
26880           delta = offsets->outgoing_args - (offsets->frame + 4);
26881
26882
26883           if (delta >= 4096)
26884             {
26885               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26886                                      GEN_INT (delta & ~4095)));
26887               addr = scratch;
26888               delta &= 4095;
26889             }
26890           else
26891             addr = stack_pointer_rtx;
26892
26893           addr = plus_constant (Pmode, addr, delta);
26894         }
26895
26896       /* The store needs to be marked to prevent DSE from deleting
26897          it as dead if it is based on fp.  */
26898       mem = gen_frame_mem (Pmode, addr);
26899       MEM_VOLATILE_P (mem) = true;
26900       emit_move_insn (mem, source);
26901     }
26902 }
26903
26904
26905 void
26906 thumb_set_return_address (rtx source, rtx scratch)
26907 {
26908   arm_stack_offsets *offsets;
26909   HOST_WIDE_INT delta;
26910   HOST_WIDE_INT limit;
26911   int reg;
26912   rtx addr, mem;
26913   unsigned long mask;
26914
26915   emit_use (source);
26916
26917   offsets = arm_get_frame_offsets ();
26918   mask = offsets->saved_regs_mask;
26919   if (mask & (1 << LR_REGNUM))
26920     {
26921       limit = 1024;
26922       /* Find the saved regs.  */
26923       if (frame_pointer_needed)
26924         {
26925           delta = offsets->soft_frame - offsets->saved_args;
26926           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26927           if (TARGET_THUMB1)
26928             limit = 128;
26929         }
26930       else
26931         {
26932           delta = offsets->outgoing_args - offsets->saved_args;
26933           reg = SP_REGNUM;
26934         }
26935       /* Allow for the stack frame.  */
26936       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26937         delta -= 16;
26938       /* The link register is always the first saved register.  */
26939       delta -= 4;
26940
26941       /* Construct the address.  */
26942       addr = gen_rtx_REG (SImode, reg);
26943       if (delta > limit)
26944         {
26945           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26946           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26947           addr = scratch;
26948         }
26949       else
26950         addr = plus_constant (Pmode, addr, delta);
26951
26952       /* The store needs to be marked to prevent DSE from deleting
26953          it as dead if it is based on fp.  */
26954       mem = gen_frame_mem (Pmode, addr);
26955       MEM_VOLATILE_P (mem) = true;
26956       emit_move_insn (mem, source);
26957     }
26958   else
26959     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26960 }
26961
26962 /* Implements target hook vector_mode_supported_p.  */
26963 bool
26964 arm_vector_mode_supported_p (machine_mode mode)
26965 {
26966   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26967   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26968       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26969       || mode == V2DImode || mode == V8HFmode))
26970     return true;
26971
26972   if ((TARGET_NEON || TARGET_IWMMXT)
26973       && ((mode == V2SImode)
26974           || (mode == V4HImode)
26975           || (mode == V8QImode)))
26976     return true;
26977
26978   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26979       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26980       || mode == V2HAmode))
26981     return true;
26982
26983   return false;
26984 }
26985
26986 /* Implements target hook array_mode_supported_p.  */
26987
26988 static bool
26989 arm_array_mode_supported_p (machine_mode mode,
26990                             unsigned HOST_WIDE_INT nelems)
26991 {
26992   if (TARGET_NEON
26993       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26994       && (nelems >= 2 && nelems <= 4))
26995     return true;
26996
26997   return false;
26998 }
26999
27000 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27001    registers when autovectorizing for Neon, at least until multiple vector
27002    widths are supported properly by the middle-end.  */
27003
27004 static machine_mode
27005 arm_preferred_simd_mode (scalar_mode mode)
27006 {
27007   if (TARGET_NEON)
27008     switch (mode)
27009       {
27010       case E_SFmode:
27011         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27012       case E_SImode:
27013         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27014       case E_HImode:
27015         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27016       case E_QImode:
27017         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27018       case E_DImode:
27019         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27020           return V2DImode;
27021         break;
27022
27023       default:;
27024       }
27025
27026   if (TARGET_REALLY_IWMMXT)
27027     switch (mode)
27028       {
27029       case E_SImode:
27030         return V2SImode;
27031       case E_HImode:
27032         return V4HImode;
27033       case E_QImode:
27034         return V8QImode;
27035
27036       default:;
27037       }
27038
27039   return word_mode;
27040 }
27041
27042 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27043
27044    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27045    using r0-r4 for function arguments, r7 for the stack frame and don't have
27046    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27047    potentially problematic instructions accept high registers so this is not
27048    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27049    that require many low registers.  */
27050 static bool
27051 arm_class_likely_spilled_p (reg_class_t rclass)
27052 {
27053   if ((TARGET_THUMB1 && rclass == LO_REGS)
27054       || rclass  == CC_REG)
27055     return true;
27056
27057   return false;
27058 }
27059
27060 /* Implements target hook small_register_classes_for_mode_p.  */
27061 bool
27062 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27063 {
27064   return TARGET_THUMB1;
27065 }
27066
27067 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27068    ARM insns and therefore guarantee that the shift count is modulo 256.
27069    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27070    guarantee no particular behavior for out-of-range counts.  */
27071
27072 static unsigned HOST_WIDE_INT
27073 arm_shift_truncation_mask (machine_mode mode)
27074 {
27075   return mode == SImode ? 255 : 0;
27076 }
27077
27078
27079 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27080
27081 unsigned int
27082 arm_dbx_register_number (unsigned int regno)
27083 {
27084   if (regno < 16)
27085     return regno;
27086
27087   if (IS_VFP_REGNUM (regno))
27088     {
27089       /* See comment in arm_dwarf_register_span.  */
27090       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27091         return 64 + regno - FIRST_VFP_REGNUM;
27092       else
27093         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27094     }
27095
27096   if (IS_IWMMXT_GR_REGNUM (regno))
27097     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27098
27099   if (IS_IWMMXT_REGNUM (regno))
27100     return 112 + regno - FIRST_IWMMXT_REGNUM;
27101
27102   return DWARF_FRAME_REGISTERS;
27103 }
27104
27105 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27106    GCC models tham as 64 32-bit registers, so we need to describe this to
27107    the DWARF generation code.  Other registers can use the default.  */
27108 static rtx
27109 arm_dwarf_register_span (rtx rtl)
27110 {
27111   machine_mode mode;
27112   unsigned regno;
27113   rtx parts[16];
27114   int nregs;
27115   int i;
27116
27117   regno = REGNO (rtl);
27118   if (!IS_VFP_REGNUM (regno))
27119     return NULL_RTX;
27120
27121   /* XXX FIXME: The EABI defines two VFP register ranges:
27122         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27123         256-287: D0-D31
27124      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27125      corresponding D register.  Until GDB supports this, we shall use the
27126      legacy encodings.  We also use these encodings for D0-D15 for
27127      compatibility with older debuggers.  */
27128   mode = GET_MODE (rtl);
27129   if (GET_MODE_SIZE (mode) < 8)
27130     return NULL_RTX;
27131
27132   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27133     {
27134       nregs = GET_MODE_SIZE (mode) / 4;
27135       for (i = 0; i < nregs; i += 2)
27136         if (TARGET_BIG_END)
27137           {
27138             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27139             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27140           }
27141         else
27142           {
27143             parts[i] = gen_rtx_REG (SImode, regno + i);
27144             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27145           }
27146     }
27147   else
27148     {
27149       nregs = GET_MODE_SIZE (mode) / 8;
27150       for (i = 0; i < nregs; i++)
27151         parts[i] = gen_rtx_REG (DImode, regno + i);
27152     }
27153
27154   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27155 }
27156
27157 #if ARM_UNWIND_INFO
27158 /* Emit unwind directives for a store-multiple instruction or stack pointer
27159    push during alignment.
27160    These should only ever be generated by the function prologue code, so
27161    expect them to have a particular form.
27162    The store-multiple instruction sometimes pushes pc as the last register,
27163    although it should not be tracked into unwind information, or for -Os
27164    sometimes pushes some dummy registers before first register that needs
27165    to be tracked in unwind information; such dummy registers are there just
27166    to avoid separate stack adjustment, and will not be restored in the
27167    epilogue.  */
27168
27169 static void
27170 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27171 {
27172   int i;
27173   HOST_WIDE_INT offset;
27174   HOST_WIDE_INT nregs;
27175   int reg_size;
27176   unsigned reg;
27177   unsigned lastreg;
27178   unsigned padfirst = 0, padlast = 0;
27179   rtx e;
27180
27181   e = XVECEXP (p, 0, 0);
27182   gcc_assert (GET_CODE (e) == SET);
27183
27184   /* First insn will adjust the stack pointer.  */
27185   gcc_assert (GET_CODE (e) == SET
27186               && REG_P (SET_DEST (e))
27187               && REGNO (SET_DEST (e)) == SP_REGNUM
27188               && GET_CODE (SET_SRC (e)) == PLUS);
27189
27190   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27191   nregs = XVECLEN (p, 0) - 1;
27192   gcc_assert (nregs);
27193
27194   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27195   if (reg < 16)
27196     {
27197       /* For -Os dummy registers can be pushed at the beginning to
27198          avoid separate stack pointer adjustment.  */
27199       e = XVECEXP (p, 0, 1);
27200       e = XEXP (SET_DEST (e), 0);
27201       if (GET_CODE (e) == PLUS)
27202         padfirst = INTVAL (XEXP (e, 1));
27203       gcc_assert (padfirst == 0 || optimize_size);
27204       /* The function prologue may also push pc, but not annotate it as it is
27205          never restored.  We turn this into a stack pointer adjustment.  */
27206       e = XVECEXP (p, 0, nregs);
27207       e = XEXP (SET_DEST (e), 0);
27208       if (GET_CODE (e) == PLUS)
27209         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27210       else
27211         padlast = offset - 4;
27212       gcc_assert (padlast == 0 || padlast == 4);
27213       if (padlast == 4)
27214         fprintf (asm_out_file, "\t.pad #4\n");
27215       reg_size = 4;
27216       fprintf (asm_out_file, "\t.save {");
27217     }
27218   else if (IS_VFP_REGNUM (reg))
27219     {
27220       reg_size = 8;
27221       fprintf (asm_out_file, "\t.vsave {");
27222     }
27223   else
27224     /* Unknown register type.  */
27225     gcc_unreachable ();
27226
27227   /* If the stack increment doesn't match the size of the saved registers,
27228      something has gone horribly wrong.  */
27229   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27230
27231   offset = padfirst;
27232   lastreg = 0;
27233   /* The remaining insns will describe the stores.  */
27234   for (i = 1; i <= nregs; i++)
27235     {
27236       /* Expect (set (mem <addr>) (reg)).
27237          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27238       e = XVECEXP (p, 0, i);
27239       gcc_assert (GET_CODE (e) == SET
27240                   && MEM_P (SET_DEST (e))
27241                   && REG_P (SET_SRC (e)));
27242
27243       reg = REGNO (SET_SRC (e));
27244       gcc_assert (reg >= lastreg);
27245
27246       if (i != 1)
27247         fprintf (asm_out_file, ", ");
27248       /* We can't use %r for vfp because we need to use the
27249          double precision register names.  */
27250       if (IS_VFP_REGNUM (reg))
27251         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27252       else
27253         asm_fprintf (asm_out_file, "%r", reg);
27254
27255       if (flag_checking)
27256         {
27257           /* Check that the addresses are consecutive.  */
27258           e = XEXP (SET_DEST (e), 0);
27259           if (GET_CODE (e) == PLUS)
27260             gcc_assert (REG_P (XEXP (e, 0))
27261                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27262                         && CONST_INT_P (XEXP (e, 1))
27263                         && offset == INTVAL (XEXP (e, 1)));
27264           else
27265             gcc_assert (i == 1
27266                         && REG_P (e)
27267                         && REGNO (e) == SP_REGNUM);
27268           offset += reg_size;
27269         }
27270     }
27271   fprintf (asm_out_file, "}\n");
27272   if (padfirst)
27273     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27274 }
27275
27276 /*  Emit unwind directives for a SET.  */
27277
27278 static void
27279 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27280 {
27281   rtx e0;
27282   rtx e1;
27283   unsigned reg;
27284
27285   e0 = XEXP (p, 0);
27286   e1 = XEXP (p, 1);
27287   switch (GET_CODE (e0))
27288     {
27289     case MEM:
27290       /* Pushing a single register.  */
27291       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27292           || !REG_P (XEXP (XEXP (e0, 0), 0))
27293           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27294         abort ();
27295
27296       asm_fprintf (asm_out_file, "\t.save ");
27297       if (IS_VFP_REGNUM (REGNO (e1)))
27298         asm_fprintf(asm_out_file, "{d%d}\n",
27299                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27300       else
27301         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27302       break;
27303
27304     case REG:
27305       if (REGNO (e0) == SP_REGNUM)
27306         {
27307           /* A stack increment.  */
27308           if (GET_CODE (e1) != PLUS
27309               || !REG_P (XEXP (e1, 0))
27310               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27311               || !CONST_INT_P (XEXP (e1, 1)))
27312             abort ();
27313
27314           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27315                        -INTVAL (XEXP (e1, 1)));
27316         }
27317       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27318         {
27319           HOST_WIDE_INT offset;
27320
27321           if (GET_CODE (e1) == PLUS)
27322             {
27323               if (!REG_P (XEXP (e1, 0))
27324                   || !CONST_INT_P (XEXP (e1, 1)))
27325                 abort ();
27326               reg = REGNO (XEXP (e1, 0));
27327               offset = INTVAL (XEXP (e1, 1));
27328               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27329                            HARD_FRAME_POINTER_REGNUM, reg,
27330                            offset);
27331             }
27332           else if (REG_P (e1))
27333             {
27334               reg = REGNO (e1);
27335               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27336                            HARD_FRAME_POINTER_REGNUM, reg);
27337             }
27338           else
27339             abort ();
27340         }
27341       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27342         {
27343           /* Move from sp to reg.  */
27344           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27345         }
27346      else if (GET_CODE (e1) == PLUS
27347               && REG_P (XEXP (e1, 0))
27348               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27349               && CONST_INT_P (XEXP (e1, 1)))
27350         {
27351           /* Set reg to offset from sp.  */
27352           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27353                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27354         }
27355       else
27356         abort ();
27357       break;
27358
27359     default:
27360       abort ();
27361     }
27362 }
27363
27364
27365 /* Emit unwind directives for the given insn.  */
27366
27367 static void
27368 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27369 {
27370   rtx note, pat;
27371   bool handled_one = false;
27372
27373   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27374     return;
27375
27376   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27377       && (TREE_NOTHROW (current_function_decl)
27378           || crtl->all_throwers_are_sibcalls))
27379     return;
27380
27381   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27382     return;
27383
27384   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27385     {
27386       switch (REG_NOTE_KIND (note))
27387         {
27388         case REG_FRAME_RELATED_EXPR:
27389           pat = XEXP (note, 0);
27390           goto found;
27391
27392         case REG_CFA_REGISTER:
27393           pat = XEXP (note, 0);
27394           if (pat == NULL)
27395             {
27396               pat = PATTERN (insn);
27397               if (GET_CODE (pat) == PARALLEL)
27398                 pat = XVECEXP (pat, 0, 0);
27399             }
27400
27401           /* Only emitted for IS_STACKALIGN re-alignment.  */
27402           {
27403             rtx dest, src;
27404             unsigned reg;
27405
27406             src = SET_SRC (pat);
27407             dest = SET_DEST (pat);
27408
27409             gcc_assert (src == stack_pointer_rtx);
27410             reg = REGNO (dest);
27411             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27412                          reg + 0x90, reg);
27413           }
27414           handled_one = true;
27415           break;
27416
27417         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27418            to get correct dwarf information for shrink-wrap.  We should not
27419            emit unwind information for it because these are used either for
27420            pretend arguments or notes to adjust sp and restore registers from
27421            stack.  */
27422         case REG_CFA_DEF_CFA:
27423         case REG_CFA_ADJUST_CFA:
27424         case REG_CFA_RESTORE:
27425           return;
27426
27427         case REG_CFA_EXPRESSION:
27428         case REG_CFA_OFFSET:
27429           /* ??? Only handling here what we actually emit.  */
27430           gcc_unreachable ();
27431
27432         default:
27433           break;
27434         }
27435     }
27436   if (handled_one)
27437     return;
27438   pat = PATTERN (insn);
27439  found:
27440
27441   switch (GET_CODE (pat))
27442     {
27443     case SET:
27444       arm_unwind_emit_set (asm_out_file, pat);
27445       break;
27446
27447     case SEQUENCE:
27448       /* Store multiple.  */
27449       arm_unwind_emit_sequence (asm_out_file, pat);
27450       break;
27451
27452     default:
27453       abort();
27454     }
27455 }
27456
27457
27458 /* Output a reference from a function exception table to the type_info
27459    object X.  The EABI specifies that the symbol should be relocated by
27460    an R_ARM_TARGET2 relocation.  */
27461
27462 static bool
27463 arm_output_ttype (rtx x)
27464 {
27465   fputs ("\t.word\t", asm_out_file);
27466   output_addr_const (asm_out_file, x);
27467   /* Use special relocations for symbol references.  */
27468   if (!CONST_INT_P (x))
27469     fputs ("(TARGET2)", asm_out_file);
27470   fputc ('\n', asm_out_file);
27471
27472   return TRUE;
27473 }
27474
27475 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27476
27477 static void
27478 arm_asm_emit_except_personality (rtx personality)
27479 {
27480   fputs ("\t.personality\t", asm_out_file);
27481   output_addr_const (asm_out_file, personality);
27482   fputc ('\n', asm_out_file);
27483 }
27484 #endif /* ARM_UNWIND_INFO */
27485
27486 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27487
27488 static void
27489 arm_asm_init_sections (void)
27490 {
27491 #if ARM_UNWIND_INFO
27492   exception_section = get_unnamed_section (0, output_section_asm_op,
27493                                            "\t.handlerdata");
27494 #endif /* ARM_UNWIND_INFO */
27495
27496 #ifdef OBJECT_FORMAT_ELF
27497   if (target_pure_code)
27498     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27499 #endif
27500 }
27501
27502 /* Output unwind directives for the start/end of a function.  */
27503
27504 void
27505 arm_output_fn_unwind (FILE * f, bool prologue)
27506 {
27507   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27508     return;
27509
27510   if (prologue)
27511     fputs ("\t.fnstart\n", f);
27512   else
27513     {
27514       /* If this function will never be unwound, then mark it as such.
27515          The came condition is used in arm_unwind_emit to suppress
27516          the frame annotations.  */
27517       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27518           && (TREE_NOTHROW (current_function_decl)
27519               || crtl->all_throwers_are_sibcalls))
27520         fputs("\t.cantunwind\n", f);
27521
27522       fputs ("\t.fnend\n", f);
27523     }
27524 }
27525
27526 static bool
27527 arm_emit_tls_decoration (FILE *fp, rtx x)
27528 {
27529   enum tls_reloc reloc;
27530   rtx val;
27531
27532   val = XVECEXP (x, 0, 0);
27533   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27534
27535   output_addr_const (fp, val);
27536
27537   switch (reloc)
27538     {
27539     case TLS_GD32:
27540       fputs ("(tlsgd)", fp);
27541       break;
27542     case TLS_LDM32:
27543       fputs ("(tlsldm)", fp);
27544       break;
27545     case TLS_LDO32:
27546       fputs ("(tlsldo)", fp);
27547       break;
27548     case TLS_IE32:
27549       fputs ("(gottpoff)", fp);
27550       break;
27551     case TLS_LE32:
27552       fputs ("(tpoff)", fp);
27553       break;
27554     case TLS_DESCSEQ:
27555       fputs ("(tlsdesc)", fp);
27556       break;
27557     default:
27558       gcc_unreachable ();
27559     }
27560
27561   switch (reloc)
27562     {
27563     case TLS_GD32:
27564     case TLS_LDM32:
27565     case TLS_IE32:
27566     case TLS_DESCSEQ:
27567       fputs (" + (. - ", fp);
27568       output_addr_const (fp, XVECEXP (x, 0, 2));
27569       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27570       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27571       output_addr_const (fp, XVECEXP (x, 0, 3));
27572       fputc (')', fp);
27573       break;
27574     default:
27575       break;
27576     }
27577
27578   return TRUE;
27579 }
27580
27581 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27582
27583 static void
27584 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27585 {
27586   gcc_assert (size == 4);
27587   fputs ("\t.word\t", file);
27588   output_addr_const (file, x);
27589   fputs ("(tlsldo)", file);
27590 }
27591
27592 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27593
27594 static bool
27595 arm_output_addr_const_extra (FILE *fp, rtx x)
27596 {
27597   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27598     return arm_emit_tls_decoration (fp, x);
27599   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27600     {
27601       char label[256];
27602       int labelno = INTVAL (XVECEXP (x, 0, 0));
27603
27604       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27605       assemble_name_raw (fp, label);
27606
27607       return TRUE;
27608     }
27609   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27610     {
27611       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27612       if (GOT_PCREL)
27613         fputs ("+.", fp);
27614       fputs ("-(", fp);
27615       output_addr_const (fp, XVECEXP (x, 0, 0));
27616       fputc (')', fp);
27617       return TRUE;
27618     }
27619   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27620     {
27621       output_addr_const (fp, XVECEXP (x, 0, 0));
27622       if (GOT_PCREL)
27623         fputs ("+.", fp);
27624       fputs ("-(", fp);
27625       output_addr_const (fp, XVECEXP (x, 0, 1));
27626       fputc (')', fp);
27627       return TRUE;
27628     }
27629   else if (GET_CODE (x) == CONST_VECTOR)
27630     return arm_emit_vector_const (fp, x);
27631
27632   return FALSE;
27633 }
27634
27635 /* Output assembly for a shift instruction.
27636    SET_FLAGS determines how the instruction modifies the condition codes.
27637    0 - Do not set condition codes.
27638    1 - Set condition codes.
27639    2 - Use smallest instruction.  */
27640 const char *
27641 arm_output_shift(rtx * operands, int set_flags)
27642 {
27643   char pattern[100];
27644   static const char flag_chars[3] = {'?', '.', '!'};
27645   const char *shift;
27646   HOST_WIDE_INT val;
27647   char c;
27648
27649   c = flag_chars[set_flags];
27650   shift = shift_op(operands[3], &val);
27651   if (shift)
27652     {
27653       if (val != -1)
27654         operands[2] = GEN_INT(val);
27655       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27656     }
27657   else
27658     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27659
27660   output_asm_insn (pattern, operands);
27661   return "";
27662 }
27663
27664 /* Output assembly for a WMMX immediate shift instruction.  */
27665 const char *
27666 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27667 {
27668   int shift = INTVAL (operands[2]);
27669   char templ[50];
27670   machine_mode opmode = GET_MODE (operands[0]);
27671
27672   gcc_assert (shift >= 0);
27673
27674   /* If the shift value in the register versions is > 63 (for D qualifier),
27675      31 (for W qualifier) or 15 (for H qualifier).  */
27676   if (((opmode == V4HImode) && (shift > 15))
27677         || ((opmode == V2SImode) && (shift > 31))
27678         || ((opmode == DImode) && (shift > 63)))
27679   {
27680     if (wror_or_wsra)
27681       {
27682         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27683         output_asm_insn (templ, operands);
27684         if (opmode == DImode)
27685           {
27686             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27687             output_asm_insn (templ, operands);
27688           }
27689       }
27690     else
27691       {
27692         /* The destination register will contain all zeros.  */
27693         sprintf (templ, "wzero\t%%0");
27694         output_asm_insn (templ, operands);
27695       }
27696     return "";
27697   }
27698
27699   if ((opmode == DImode) && (shift > 32))
27700     {
27701       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27702       output_asm_insn (templ, operands);
27703       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27704       output_asm_insn (templ, operands);
27705     }
27706   else
27707     {
27708       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27709       output_asm_insn (templ, operands);
27710     }
27711   return "";
27712 }
27713
27714 /* Output assembly for a WMMX tinsr instruction.  */
27715 const char *
27716 arm_output_iwmmxt_tinsr (rtx *operands)
27717 {
27718   int mask = INTVAL (operands[3]);
27719   int i;
27720   char templ[50];
27721   int units = mode_nunits[GET_MODE (operands[0])];
27722   gcc_assert ((mask & (mask - 1)) == 0);
27723   for (i = 0; i < units; ++i)
27724     {
27725       if ((mask & 0x01) == 1)
27726         {
27727           break;
27728         }
27729       mask >>= 1;
27730     }
27731   gcc_assert (i < units);
27732   {
27733     switch (GET_MODE (operands[0]))
27734       {
27735       case E_V8QImode:
27736         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27737         break;
27738       case E_V4HImode:
27739         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27740         break;
27741       case E_V2SImode:
27742         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27743         break;
27744       default:
27745         gcc_unreachable ();
27746         break;
27747       }
27748     output_asm_insn (templ, operands);
27749   }
27750   return "";
27751 }
27752
27753 /* Output a Thumb-1 casesi dispatch sequence.  */
27754 const char *
27755 thumb1_output_casesi (rtx *operands)
27756 {
27757   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27758
27759   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27760
27761   switch (GET_MODE(diff_vec))
27762     {
27763     case E_QImode:
27764       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27765               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27766     case E_HImode:
27767       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27768               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27769     case E_SImode:
27770       return "bl\t%___gnu_thumb1_case_si";
27771     default:
27772       gcc_unreachable ();
27773     }
27774 }
27775
27776 /* Output a Thumb-2 casesi instruction.  */
27777 const char *
27778 thumb2_output_casesi (rtx *operands)
27779 {
27780   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27781
27782   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27783
27784   output_asm_insn ("cmp\t%0, %1", operands);
27785   output_asm_insn ("bhi\t%l3", operands);
27786   switch (GET_MODE(diff_vec))
27787     {
27788     case E_QImode:
27789       return "tbb\t[%|pc, %0]";
27790     case E_HImode:
27791       return "tbh\t[%|pc, %0, lsl #1]";
27792     case E_SImode:
27793       if (flag_pic)
27794         {
27795           output_asm_insn ("adr\t%4, %l2", operands);
27796           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27797           output_asm_insn ("add\t%4, %4, %5", operands);
27798           return "bx\t%4";
27799         }
27800       else
27801         {
27802           output_asm_insn ("adr\t%4, %l2", operands);
27803           return "ldr\t%|pc, [%4, %0, lsl #2]";
27804         }
27805     default:
27806       gcc_unreachable ();
27807     }
27808 }
27809
27810 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27811    per-core tuning structs.  */
27812 static int
27813 arm_issue_rate (void)
27814 {
27815   return current_tune->issue_rate;
27816 }
27817
27818 /* Return how many instructions should scheduler lookahead to choose the
27819    best one.  */
27820 static int
27821 arm_first_cycle_multipass_dfa_lookahead (void)
27822 {
27823   int issue_rate = arm_issue_rate ();
27824
27825   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27826 }
27827
27828 /* Enable modeling of L2 auto-prefetcher.  */
27829 static int
27830 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27831 {
27832   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27833 }
27834
27835 const char *
27836 arm_mangle_type (const_tree type)
27837 {
27838   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27839      has to be managled as if it is in the "std" namespace.  */
27840   if (TARGET_AAPCS_BASED
27841       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27842     return "St9__va_list";
27843
27844   /* Half-precision float.  */
27845   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27846     return "Dh";
27847
27848   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27849      builtin type.  */
27850   if (TYPE_NAME (type) != NULL)
27851     return arm_mangle_builtin_type (type);
27852
27853   /* Use the default mangling.  */
27854   return NULL;
27855 }
27856
27857 /* Order of allocation of core registers for Thumb: this allocation is
27858    written over the corresponding initial entries of the array
27859    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27860    first.  Saving and restoring a low register is usually cheaper than
27861    using a call-clobbered high register.  */
27862
27863 static const int thumb_core_reg_alloc_order[] =
27864 {
27865    3,  2,  1,  0,  4,  5,  6,  7,
27866   12, 14,  8,  9, 10, 11
27867 };
27868
27869 /* Adjust register allocation order when compiling for Thumb.  */
27870
27871 void
27872 arm_order_regs_for_local_alloc (void)
27873 {
27874   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27875   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27876   if (TARGET_THUMB)
27877     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27878             sizeof (thumb_core_reg_alloc_order));
27879 }
27880
27881 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27882
27883 bool
27884 arm_frame_pointer_required (void)
27885 {
27886   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27887     return true;
27888
27889   /* If the function receives nonlocal gotos, it needs to save the frame
27890      pointer in the nonlocal_goto_save_area object.  */
27891   if (cfun->has_nonlocal_label)
27892     return true;
27893
27894   /* The frame pointer is required for non-leaf APCS frames.  */
27895   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27896     return true;
27897
27898   /* If we are probing the stack in the prologue, we will have a faulting
27899      instruction prior to the stack adjustment and this requires a frame
27900      pointer if we want to catch the exception using the EABI unwinder.  */
27901   if (!IS_INTERRUPT (arm_current_func_type ())
27902       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27903           || flag_stack_clash_protection)
27904       && arm_except_unwind_info (&global_options) == UI_TARGET
27905       && cfun->can_throw_non_call_exceptions)
27906     {
27907       HOST_WIDE_INT size = get_frame_size ();
27908
27909       /* That's irrelevant if there is no stack adjustment.  */
27910       if (size <= 0)
27911         return false;
27912
27913       /* That's relevant only if there is a stack probe.  */
27914       if (crtl->is_leaf && !cfun->calls_alloca)
27915         {
27916           /* We don't have the final size of the frame so adjust.  */
27917           size += 32 * UNITS_PER_WORD;
27918           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27919             return true;
27920         }
27921       else
27922         return true;
27923     }
27924
27925   return false;
27926 }
27927
27928 /* Only thumb1 can't support conditional execution, so return true if
27929    the target is not thumb1.  */
27930 static bool
27931 arm_have_conditional_execution (void)
27932 {
27933   return !TARGET_THUMB1;
27934 }
27935
27936 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27937 static HOST_WIDE_INT
27938 arm_vector_alignment (const_tree type)
27939 {
27940   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27941
27942   if (TARGET_AAPCS_BASED)
27943     align = MIN (align, 64);
27944
27945   return align;
27946 }
27947
27948 static unsigned int
27949 arm_autovectorize_vector_sizes (void)
27950 {
27951   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27952 }
27953
27954 static bool
27955 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27956 {
27957   /* Vectors which aren't in packed structures will not be less aligned than
27958      the natural alignment of their element type, so this is safe.  */
27959   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27960     return !is_packed;
27961
27962   return default_builtin_vector_alignment_reachable (type, is_packed);
27963 }
27964
27965 static bool
27966 arm_builtin_support_vector_misalignment (machine_mode mode,
27967                                          const_tree type, int misalignment,
27968                                          bool is_packed)
27969 {
27970   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27971     {
27972       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27973
27974       if (is_packed)
27975         return align == 1;
27976
27977       /* If the misalignment is unknown, we should be able to handle the access
27978          so long as it is not to a member of a packed data structure.  */
27979       if (misalignment == -1)
27980         return true;
27981
27982       /* Return true if the misalignment is a multiple of the natural alignment
27983          of the vector's element type.  This is probably always going to be
27984          true in practice, since we've already established that this isn't a
27985          packed access.  */
27986       return ((misalignment % align) == 0);
27987     }
27988
27989   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27990                                                       is_packed);
27991 }
27992
27993 static void
27994 arm_conditional_register_usage (void)
27995 {
27996   int regno;
27997
27998   if (TARGET_THUMB1 && optimize_size)
27999     {
28000       /* When optimizing for size on Thumb-1, it's better not
28001         to use the HI regs, because of the overhead of
28002         stacking them.  */
28003       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28004         fixed_regs[regno] = call_used_regs[regno] = 1;
28005     }
28006
28007   /* The link register can be clobbered by any branch insn,
28008      but we have no way to track that at present, so mark
28009      it as unavailable.  */
28010   if (TARGET_THUMB1)
28011     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28012
28013   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28014     {
28015       /* VFPv3 registers are disabled when earlier VFP
28016          versions are selected due to the definition of
28017          LAST_VFP_REGNUM.  */
28018       for (regno = FIRST_VFP_REGNUM;
28019            regno <= LAST_VFP_REGNUM; ++ regno)
28020         {
28021           fixed_regs[regno] = 0;
28022           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28023             || regno >= FIRST_VFP_REGNUM + 32;
28024         }
28025     }
28026
28027   if (TARGET_REALLY_IWMMXT)
28028     {
28029       regno = FIRST_IWMMXT_GR_REGNUM;
28030       /* The 2002/10/09 revision of the XScale ABI has wCG0
28031          and wCG1 as call-preserved registers.  The 2002/11/21
28032          revision changed this so that all wCG registers are
28033          scratch registers.  */
28034       for (regno = FIRST_IWMMXT_GR_REGNUM;
28035            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28036         fixed_regs[regno] = 0;
28037       /* The XScale ABI has wR0 - wR9 as scratch registers,
28038          the rest as call-preserved registers.  */
28039       for (regno = FIRST_IWMMXT_REGNUM;
28040            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28041         {
28042           fixed_regs[regno] = 0;
28043           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28044         }
28045     }
28046
28047   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28048     {
28049       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28050       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28051     }
28052   else if (TARGET_APCS_STACK)
28053     {
28054       fixed_regs[10]     = 1;
28055       call_used_regs[10] = 1;
28056     }
28057   /* -mcaller-super-interworking reserves r11 for calls to
28058      _interwork_r11_call_via_rN().  Making the register global
28059      is an easy way of ensuring that it remains valid for all
28060      calls.  */
28061   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28062       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28063     {
28064       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28065       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28066       if (TARGET_CALLER_INTERWORKING)
28067         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28068     }
28069   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28070 }
28071
28072 static reg_class_t
28073 arm_preferred_rename_class (reg_class_t rclass)
28074 {
28075   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28076      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28077      and code size can be reduced.  */
28078   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28079     return LO_REGS;
28080   else
28081     return NO_REGS;
28082 }
28083
28084 /* Compute the attribute "length" of insn "*push_multi".
28085    So this function MUST be kept in sync with that insn pattern.  */
28086 int
28087 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28088 {
28089   int i, regno, hi_reg;
28090   int num_saves = XVECLEN (parallel_op, 0);
28091
28092   /* ARM mode.  */
28093   if (TARGET_ARM)
28094     return 4;
28095   /* Thumb1 mode.  */
28096   if (TARGET_THUMB1)
28097     return 2;
28098
28099   /* Thumb2 mode.  */
28100   regno = REGNO (first_op);
28101   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28102      list is 8-bit.  Normally this means all registers in the list must be
28103      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28104      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28105      with 16-bit encoding.  */
28106   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28107   for (i = 1; i < num_saves && !hi_reg; i++)
28108     {
28109       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28110       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28111     }
28112
28113   if (!hi_reg)
28114     return 2;
28115   return 4;
28116 }
28117
28118 /* Compute the attribute "length" of insn.  Currently, this function is used
28119    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28120    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28121    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28122    true if OPERANDS contains insn which explicit updates base register.  */
28123
28124 int
28125 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28126 {
28127   /* ARM mode.  */
28128   if (TARGET_ARM)
28129     return 4;
28130   /* Thumb1 mode.  */
28131   if (TARGET_THUMB1)
28132     return 2;
28133
28134   rtx parallel_op = operands[0];
28135   /* Initialize to elements number of PARALLEL.  */
28136   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28137   /* Initialize the value to base register.  */
28138   unsigned regno = REGNO (operands[1]);
28139   /* Skip return and write back pattern.
28140      We only need register pop pattern for later analysis.  */
28141   unsigned first_indx = 0;
28142   first_indx += return_pc ? 1 : 0;
28143   first_indx += write_back_p ? 1 : 0;
28144
28145   /* A pop operation can be done through LDM or POP.  If the base register is SP
28146      and if it's with write back, then a LDM will be alias of POP.  */
28147   bool pop_p = (regno == SP_REGNUM && write_back_p);
28148   bool ldm_p = !pop_p;
28149
28150   /* Check base register for LDM.  */
28151   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28152     return 4;
28153
28154   /* Check each register in the list.  */
28155   for (; indx >= first_indx; indx--)
28156     {
28157       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28158       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28159          comment in arm_attr_length_push_multi.  */
28160       if (REGNO_REG_CLASS (regno) == HI_REGS
28161           && (regno != PC_REGNUM || ldm_p))
28162         return 4;
28163     }
28164
28165   return 2;
28166 }
28167
28168 /* Compute the number of instructions emitted by output_move_double.  */
28169 int
28170 arm_count_output_move_double_insns (rtx *operands)
28171 {
28172   int count;
28173   rtx ops[2];
28174   /* output_move_double may modify the operands array, so call it
28175      here on a copy of the array.  */
28176   ops[0] = operands[0];
28177   ops[1] = operands[1];
28178   output_move_double (ops, false, &count);
28179   return count;
28180 }
28181
28182 int
28183 vfp3_const_double_for_fract_bits (rtx operand)
28184 {
28185   REAL_VALUE_TYPE r0;
28186
28187   if (!CONST_DOUBLE_P (operand))
28188     return 0;
28189
28190   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28191   if (exact_real_inverse (DFmode, &r0)
28192       && !REAL_VALUE_NEGATIVE (r0))
28193     {
28194       if (exact_real_truncate (DFmode, &r0))
28195         {
28196           HOST_WIDE_INT value = real_to_integer (&r0);
28197           value = value & 0xffffffff;
28198           if ((value != 0) && ( (value & (value - 1)) == 0))
28199             {
28200               int ret = exact_log2 (value);
28201               gcc_assert (IN_RANGE (ret, 0, 31));
28202               return ret;
28203             }
28204         }
28205     }
28206   return 0;
28207 }
28208
28209 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28210    log2 is in [1, 32], return that log2.  Otherwise return -1.
28211    This is used in the patterns for vcvt.s32.f32 floating-point to
28212    fixed-point conversions.  */
28213
28214 int
28215 vfp3_const_double_for_bits (rtx x)
28216 {
28217   const REAL_VALUE_TYPE *r;
28218
28219   if (!CONST_DOUBLE_P (x))
28220     return -1;
28221
28222   r = CONST_DOUBLE_REAL_VALUE (x);
28223
28224   if (REAL_VALUE_NEGATIVE (*r)
28225       || REAL_VALUE_ISNAN (*r)
28226       || REAL_VALUE_ISINF (*r)
28227       || !real_isinteger (r, SFmode))
28228     return -1;
28229
28230   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28231
28232 /* The exact_log2 above will have returned -1 if this is
28233    not an exact log2.  */
28234   if (!IN_RANGE (hwint, 1, 32))
28235     return -1;
28236
28237   return hwint;
28238 }
28239
28240 \f
28241 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28242
28243 static void
28244 arm_pre_atomic_barrier (enum memmodel model)
28245 {
28246   if (need_atomic_barrier_p (model, true))
28247     emit_insn (gen_memory_barrier ());
28248 }
28249
28250 static void
28251 arm_post_atomic_barrier (enum memmodel model)
28252 {
28253   if (need_atomic_barrier_p (model, false))
28254     emit_insn (gen_memory_barrier ());
28255 }
28256
28257 /* Emit the load-exclusive and store-exclusive instructions.
28258    Use acquire and release versions if necessary.  */
28259
28260 static void
28261 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28262 {
28263   rtx (*gen) (rtx, rtx);
28264
28265   if (acq)
28266     {
28267       switch (mode)
28268         {
28269         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28270         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28271         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28272         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28273         default:
28274           gcc_unreachable ();
28275         }
28276     }
28277   else
28278     {
28279       switch (mode)
28280         {
28281         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28282         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28283         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28284         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28285         default:
28286           gcc_unreachable ();
28287         }
28288     }
28289
28290   emit_insn (gen (rval, mem));
28291 }
28292
28293 static void
28294 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28295                           rtx mem, bool rel)
28296 {
28297   rtx (*gen) (rtx, rtx, rtx);
28298
28299   if (rel)
28300     {
28301       switch (mode)
28302         {
28303         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28304         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28305         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28306         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28307         default:
28308           gcc_unreachable ();
28309         }
28310     }
28311   else
28312     {
28313       switch (mode)
28314         {
28315         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28316         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28317         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28318         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28319         default:
28320           gcc_unreachable ();
28321         }
28322     }
28323
28324   emit_insn (gen (bval, rval, mem));
28325 }
28326
28327 /* Mark the previous jump instruction as unlikely.  */
28328
28329 static void
28330 emit_unlikely_jump (rtx insn)
28331 {
28332   rtx_insn *jump = emit_jump_insn (insn);
28333   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28334 }
28335
28336 /* Expand a compare and swap pattern.  */
28337
28338 void
28339 arm_expand_compare_and_swap (rtx operands[])
28340 {
28341   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28342   machine_mode mode;
28343   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28344
28345   bval = operands[0];
28346   rval = operands[1];
28347   mem = operands[2];
28348   oldval = operands[3];
28349   newval = operands[4];
28350   is_weak = operands[5];
28351   mod_s = operands[6];
28352   mod_f = operands[7];
28353   mode = GET_MODE (mem);
28354
28355   /* Normally the succ memory model must be stronger than fail, but in the
28356      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28357      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28358
28359   if (TARGET_HAVE_LDACQ
28360       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28361       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28362     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28363
28364   switch (mode)
28365     {
28366     case E_QImode:
28367     case E_HImode:
28368       /* For narrow modes, we're going to perform the comparison in SImode,
28369          so do the zero-extension now.  */
28370       rval = gen_reg_rtx (SImode);
28371       oldval = convert_modes (SImode, mode, oldval, true);
28372       /* FALLTHRU */
28373
28374     case E_SImode:
28375       /* Force the value into a register if needed.  We waited until after
28376          the zero-extension above to do this properly.  */
28377       if (!arm_add_operand (oldval, SImode))
28378         oldval = force_reg (SImode, oldval);
28379       break;
28380
28381     case E_DImode:
28382       if (!cmpdi_operand (oldval, mode))
28383         oldval = force_reg (mode, oldval);
28384       break;
28385
28386     default:
28387       gcc_unreachable ();
28388     }
28389
28390   if (TARGET_THUMB1)
28391     {
28392       switch (mode)
28393         {
28394         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28395         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28396         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28397         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28398         default:
28399           gcc_unreachable ();
28400         }
28401     }
28402   else
28403     {
28404       switch (mode)
28405         {
28406         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28407         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28408         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28409         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28410         default:
28411           gcc_unreachable ();
28412         }
28413     }
28414
28415   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28416   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28417
28418   if (mode == QImode || mode == HImode)
28419     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28420
28421   /* In all cases, we arrange for success to be signaled by Z set.
28422      This arrangement allows for the boolean result to be used directly
28423      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28424      boolean negation of the result is also stored in bval because Thumb-1
28425      backend lacks dependency tracking for CC flag due to flag-setting not
28426      being represented at RTL level.  */
28427   if (TARGET_THUMB1)
28428       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28429   else
28430     {
28431       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28432       emit_insn (gen_rtx_SET (bval, x));
28433     }
28434 }
28435
28436 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28437    another memory store between the load-exclusive and store-exclusive can
28438    reset the monitor from Exclusive to Open state.  This means we must wait
28439    until after reload to split the pattern, lest we get a register spill in
28440    the middle of the atomic sequence.  Success of the compare and swap is
28441    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28442    for Thumb-1 targets (ie. negation of the boolean value returned by
28443    atomic_compare_and_swapmode standard pattern in operand 0).  */
28444
28445 void
28446 arm_split_compare_and_swap (rtx operands[])
28447 {
28448   rtx rval, mem, oldval, newval, neg_bval;
28449   machine_mode mode;
28450   enum memmodel mod_s, mod_f;
28451   bool is_weak;
28452   rtx_code_label *label1, *label2;
28453   rtx x, cond;
28454
28455   rval = operands[1];
28456   mem = operands[2];
28457   oldval = operands[3];
28458   newval = operands[4];
28459   is_weak = (operands[5] != const0_rtx);
28460   mod_s = memmodel_from_int (INTVAL (operands[6]));
28461   mod_f = memmodel_from_int (INTVAL (operands[7]));
28462   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28463   mode = GET_MODE (mem);
28464
28465   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28466
28467   bool use_acquire = TARGET_HAVE_LDACQ
28468                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28469                           || is_mm_release (mod_s));
28470
28471   bool use_release = TARGET_HAVE_LDACQ
28472                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28473                           || is_mm_acquire (mod_s));
28474
28475   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28476      a full barrier is emitted after the store-release.  */
28477   if (is_armv8_sync)
28478     use_acquire = false;
28479
28480   /* Checks whether a barrier is needed and emits one accordingly.  */
28481   if (!(use_acquire || use_release))
28482     arm_pre_atomic_barrier (mod_s);
28483
28484   label1 = NULL;
28485   if (!is_weak)
28486     {
28487       label1 = gen_label_rtx ();
28488       emit_label (label1);
28489     }
28490   label2 = gen_label_rtx ();
28491
28492   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28493
28494   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28495      as required to communicate with arm_expand_compare_and_swap.  */
28496   if (TARGET_32BIT)
28497     {
28498       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28499       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28500       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28501                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28502       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28503     }
28504   else
28505     {
28506       emit_move_insn (neg_bval, const1_rtx);
28507       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28508       if (thumb1_cmpneg_operand (oldval, SImode))
28509         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28510                                                     label2, cond));
28511       else
28512         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28513     }
28514
28515   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28516
28517   /* Weak or strong, we want EQ to be true for success, so that we
28518      match the flags that we got from the compare above.  */
28519   if (TARGET_32BIT)
28520     {
28521       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28522       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28523       emit_insn (gen_rtx_SET (cond, x));
28524     }
28525
28526   if (!is_weak)
28527     {
28528       /* Z is set to boolean value of !neg_bval, as required to communicate
28529          with arm_expand_compare_and_swap.  */
28530       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28531       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28532     }
28533
28534   if (!is_mm_relaxed (mod_f))
28535     emit_label (label2);
28536
28537   /* Checks whether a barrier is needed and emits one accordingly.  */
28538   if (is_armv8_sync
28539       || !(use_acquire || use_release))
28540     arm_post_atomic_barrier (mod_s);
28541
28542   if (is_mm_relaxed (mod_f))
28543     emit_label (label2);
28544 }
28545
28546 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28547    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28548    operation).  Operation is performed on the content at MEM and on VALUE
28549    following the memory model MODEL_RTX.  The content at MEM before and after
28550    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28551    success of the operation is returned in COND.  Using a scratch register or
28552    an operand register for these determines what result is returned for that
28553    pattern.  */
28554
28555 void
28556 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28557                      rtx value, rtx model_rtx, rtx cond)
28558 {
28559   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28560   machine_mode mode = GET_MODE (mem);
28561   machine_mode wmode = (mode == DImode ? DImode : SImode);
28562   rtx_code_label *label;
28563   bool all_low_regs, bind_old_new;
28564   rtx x;
28565
28566   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28567
28568   bool use_acquire = TARGET_HAVE_LDACQ
28569                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28570                           || is_mm_release (model));
28571
28572   bool use_release = TARGET_HAVE_LDACQ
28573                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28574                           || is_mm_acquire (model));
28575
28576   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28577      a full barrier is emitted after the store-release.  */
28578   if (is_armv8_sync)
28579     use_acquire = false;
28580
28581   /* Checks whether a barrier is needed and emits one accordingly.  */
28582   if (!(use_acquire || use_release))
28583     arm_pre_atomic_barrier (model);
28584
28585   label = gen_label_rtx ();
28586   emit_label (label);
28587
28588   if (new_out)
28589     new_out = gen_lowpart (wmode, new_out);
28590   if (old_out)
28591     old_out = gen_lowpart (wmode, old_out);
28592   else
28593     old_out = new_out;
28594   value = simplify_gen_subreg (wmode, value, mode, 0);
28595
28596   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28597
28598   /* Does the operation require destination and first operand to use the same
28599      register?  This is decided by register constraints of relevant insn
28600      patterns in thumb1.md.  */
28601   gcc_assert (!new_out || REG_P (new_out));
28602   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28603                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28604                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28605   bind_old_new =
28606     (TARGET_THUMB1
28607      && code != SET
28608      && code != MINUS
28609      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28610
28611   /* We want to return the old value while putting the result of the operation
28612      in the same register as the old value so copy the old value over to the
28613      destination register and use that register for the operation.  */
28614   if (old_out && bind_old_new)
28615     {
28616       emit_move_insn (new_out, old_out);
28617       old_out = new_out;
28618     }
28619
28620   switch (code)
28621     {
28622     case SET:
28623       new_out = value;
28624       break;
28625
28626     case NOT:
28627       x = gen_rtx_AND (wmode, old_out, value);
28628       emit_insn (gen_rtx_SET (new_out, x));
28629       x = gen_rtx_NOT (wmode, new_out);
28630       emit_insn (gen_rtx_SET (new_out, x));
28631       break;
28632
28633     case MINUS:
28634       if (CONST_INT_P (value))
28635         {
28636           value = GEN_INT (-INTVAL (value));
28637           code = PLUS;
28638         }
28639       /* FALLTHRU */
28640
28641     case PLUS:
28642       if (mode == DImode)
28643         {
28644           /* DImode plus/minus need to clobber flags.  */
28645           /* The adddi3 and subdi3 patterns are incorrectly written so that
28646              they require matching operands, even when we could easily support
28647              three operands.  Thankfully, this can be fixed up post-splitting,
28648              as the individual add+adc patterns do accept three operands and
28649              post-reload cprop can make these moves go away.  */
28650           emit_move_insn (new_out, old_out);
28651           if (code == PLUS)
28652             x = gen_adddi3 (new_out, new_out, value);
28653           else
28654             x = gen_subdi3 (new_out, new_out, value);
28655           emit_insn (x);
28656           break;
28657         }
28658       /* FALLTHRU */
28659
28660     default:
28661       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28662       emit_insn (gen_rtx_SET (new_out, x));
28663       break;
28664     }
28665
28666   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28667                             use_release);
28668
28669   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28670   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28671
28672   /* Checks whether a barrier is needed and emits one accordingly.  */
28673   if (is_armv8_sync
28674       || !(use_acquire || use_release))
28675     arm_post_atomic_barrier (model);
28676 }
28677 \f
28678 #define MAX_VECT_LEN 16
28679
28680 struct expand_vec_perm_d
28681 {
28682   rtx target, op0, op1;
28683   auto_vec_perm_indices perm;
28684   machine_mode vmode;
28685   bool one_vector_p;
28686   bool testing_p;
28687 };
28688
28689 /* Generate a variable permutation.  */
28690
28691 static void
28692 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28693 {
28694   machine_mode vmode = GET_MODE (target);
28695   bool one_vector_p = rtx_equal_p (op0, op1);
28696
28697   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28698   gcc_checking_assert (GET_MODE (op0) == vmode);
28699   gcc_checking_assert (GET_MODE (op1) == vmode);
28700   gcc_checking_assert (GET_MODE (sel) == vmode);
28701   gcc_checking_assert (TARGET_NEON);
28702
28703   if (one_vector_p)
28704     {
28705       if (vmode == V8QImode)
28706         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28707       else
28708         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28709     }
28710   else
28711     {
28712       rtx pair;
28713
28714       if (vmode == V8QImode)
28715         {
28716           pair = gen_reg_rtx (V16QImode);
28717           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28718           pair = gen_lowpart (TImode, pair);
28719           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28720         }
28721       else
28722         {
28723           pair = gen_reg_rtx (OImode);
28724           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28725           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28726         }
28727     }
28728 }
28729
28730 void
28731 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28732 {
28733   machine_mode vmode = GET_MODE (target);
28734   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28735   bool one_vector_p = rtx_equal_p (op0, op1);
28736   rtx rmask[MAX_VECT_LEN], mask;
28737
28738   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28739      numbering of elements for big-endian, we must reverse the order.  */
28740   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28741
28742   /* The VTBL instruction does not use a modulo index, so we must take care
28743      of that ourselves.  */
28744   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28745   for (i = 0; i < nelt; ++i)
28746     rmask[i] = mask;
28747   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28748   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28749
28750   arm_expand_vec_perm_1 (target, op0, op1, sel);
28751 }
28752
28753 /* Map lane ordering between architectural lane order, and GCC lane order,
28754    taking into account ABI.  See comment above output_move_neon for details.  */
28755
28756 static int
28757 neon_endian_lane_map (machine_mode mode, int lane)
28758 {
28759   if (BYTES_BIG_ENDIAN)
28760   {
28761     int nelems = GET_MODE_NUNITS (mode);
28762     /* Reverse lane order.  */
28763     lane = (nelems - 1 - lane);
28764     /* Reverse D register order, to match ABI.  */
28765     if (GET_MODE_SIZE (mode) == 16)
28766       lane = lane ^ (nelems / 2);
28767   }
28768   return lane;
28769 }
28770
28771 /* Some permutations index into pairs of vectors, this is a helper function
28772    to map indexes into those pairs of vectors.  */
28773
28774 static int
28775 neon_pair_endian_lane_map (machine_mode mode, int lane)
28776 {
28777   int nelem = GET_MODE_NUNITS (mode);
28778   if (BYTES_BIG_ENDIAN)
28779     lane =
28780       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28781   return lane;
28782 }
28783
28784 /* Generate or test for an insn that supports a constant permutation.  */
28785
28786 /* Recognize patterns for the VUZP insns.  */
28787
28788 static bool
28789 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28790 {
28791   unsigned int i, odd, mask, nelt = d->perm.length ();
28792   rtx out0, out1, in0, in1;
28793   rtx (*gen)(rtx, rtx, rtx, rtx);
28794   int first_elem;
28795   int swap_nelt;
28796
28797   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28798     return false;
28799
28800   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28801      big endian pattern on 64 bit vectors, so we correct for that.  */
28802   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28803     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28804
28805   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28806
28807   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28808     odd = 0;
28809   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28810     odd = 1;
28811   else
28812     return false;
28813   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28814
28815   for (i = 0; i < nelt; i++)
28816     {
28817       unsigned elt =
28818         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28819       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28820         return false;
28821     }
28822
28823   /* Success!  */
28824   if (d->testing_p)
28825     return true;
28826
28827   switch (d->vmode)
28828     {
28829     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28830     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28831     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28832     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28833     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28834     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28835     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28836     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28837     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28838     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28839     default:
28840       gcc_unreachable ();
28841     }
28842
28843   in0 = d->op0;
28844   in1 = d->op1;
28845   if (swap_nelt != 0)
28846     std::swap (in0, in1);
28847
28848   out0 = d->target;
28849   out1 = gen_reg_rtx (d->vmode);
28850   if (odd)
28851     std::swap (out0, out1);
28852
28853   emit_insn (gen (out0, in0, in1, out1));
28854   return true;
28855 }
28856
28857 /* Recognize patterns for the VZIP insns.  */
28858
28859 static bool
28860 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28861 {
28862   unsigned int i, high, mask, nelt = d->perm.length ();
28863   rtx out0, out1, in0, in1;
28864   rtx (*gen)(rtx, rtx, rtx, rtx);
28865   int first_elem;
28866   bool is_swapped;
28867
28868   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28869     return false;
28870
28871   is_swapped = BYTES_BIG_ENDIAN;
28872
28873   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28874
28875   high = nelt / 2;
28876   if (first_elem == neon_endian_lane_map (d->vmode, high))
28877     ;
28878   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28879     high = 0;
28880   else
28881     return false;
28882   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28883
28884   for (i = 0; i < nelt / 2; i++)
28885     {
28886       unsigned elt =
28887         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28888       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28889           != elt)
28890         return false;
28891       elt =
28892         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28893       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28894           != elt)
28895         return false;
28896     }
28897
28898   /* Success!  */
28899   if (d->testing_p)
28900     return true;
28901
28902   switch (d->vmode)
28903     {
28904     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28905     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28906     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28907     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28908     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28909     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28910     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28911     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28912     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28913     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28914     default:
28915       gcc_unreachable ();
28916     }
28917
28918   in0 = d->op0;
28919   in1 = d->op1;
28920   if (is_swapped)
28921     std::swap (in0, in1);
28922
28923   out0 = d->target;
28924   out1 = gen_reg_rtx (d->vmode);
28925   if (high)
28926     std::swap (out0, out1);
28927
28928   emit_insn (gen (out0, in0, in1, out1));
28929   return true;
28930 }
28931
28932 /* Recognize patterns for the VREV insns.  */
28933
28934 static bool
28935 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28936 {
28937   unsigned int i, j, diff, nelt = d->perm.length ();
28938   rtx (*gen)(rtx, rtx);
28939
28940   if (!d->one_vector_p)
28941     return false;
28942
28943   diff = d->perm[0];
28944   switch (diff)
28945     {
28946     case 7:
28947       switch (d->vmode)
28948         {
28949         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28950         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28951         default:
28952           return false;
28953         }
28954       break;
28955     case 3:
28956       switch (d->vmode)
28957         {
28958         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28959         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28960         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28961         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28962         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28963         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28964         default:
28965           return false;
28966         }
28967       break;
28968     case 1:
28969       switch (d->vmode)
28970         {
28971         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28972         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28973         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28974         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28975         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28976         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28977         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28978         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28979         default:
28980           return false;
28981         }
28982       break;
28983     default:
28984       return false;
28985     }
28986
28987   for (i = 0; i < nelt ; i += diff + 1)
28988     for (j = 0; j <= diff; j += 1)
28989       {
28990         /* This is guaranteed to be true as the value of diff
28991            is 7, 3, 1 and we should have enough elements in the
28992            queue to generate this. Getting a vector mask with a
28993            value of diff other than these values implies that
28994            something is wrong by the time we get here.  */
28995         gcc_assert (i + j < nelt);
28996         if (d->perm[i + j] != i + diff - j)
28997           return false;
28998       }
28999
29000   /* Success! */
29001   if (d->testing_p)
29002     return true;
29003
29004   emit_insn (gen (d->target, d->op0));
29005   return true;
29006 }
29007
29008 /* Recognize patterns for the VTRN insns.  */
29009
29010 static bool
29011 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29012 {
29013   unsigned int i, odd, mask, nelt = d->perm.length ();
29014   rtx out0, out1, in0, in1;
29015   rtx (*gen)(rtx, rtx, rtx, rtx);
29016
29017   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29018     return false;
29019
29020   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29021   if (d->perm[0] == 0)
29022     odd = 0;
29023   else if (d->perm[0] == 1)
29024     odd = 1;
29025   else
29026     return false;
29027   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29028
29029   for (i = 0; i < nelt; i += 2)
29030     {
29031       if (d->perm[i] != i + odd)
29032         return false;
29033       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29034         return false;
29035     }
29036
29037   /* Success!  */
29038   if (d->testing_p)
29039     return true;
29040
29041   switch (d->vmode)
29042     {
29043     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29044     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29045     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29046     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29047     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29048     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29049     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29050     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29051     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29052     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29053     default:
29054       gcc_unreachable ();
29055     }
29056
29057   in0 = d->op0;
29058   in1 = d->op1;
29059   if (BYTES_BIG_ENDIAN)
29060     {
29061       std::swap (in0, in1);
29062       odd = !odd;
29063     }
29064
29065   out0 = d->target;
29066   out1 = gen_reg_rtx (d->vmode);
29067   if (odd)
29068     std::swap (out0, out1);
29069
29070   emit_insn (gen (out0, in0, in1, out1));
29071   return true;
29072 }
29073
29074 /* Recognize patterns for the VEXT insns.  */
29075
29076 static bool
29077 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29078 {
29079   unsigned int i, nelt = d->perm.length ();
29080   rtx (*gen) (rtx, rtx, rtx, rtx);
29081   rtx offset;
29082
29083   unsigned int location;
29084
29085   unsigned int next  = d->perm[0] + 1;
29086
29087   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29088   if (BYTES_BIG_ENDIAN)
29089     return false;
29090
29091   /* Check if the extracted indexes are increasing by one.  */
29092   for (i = 1; i < nelt; next++, i++)
29093     {
29094       /* If we hit the most significant element of the 2nd vector in
29095          the previous iteration, no need to test further.  */
29096       if (next == 2 * nelt)
29097         return false;
29098
29099       /* If we are operating on only one vector: it could be a
29100          rotation.  If there are only two elements of size < 64, let
29101          arm_evpc_neon_vrev catch it.  */
29102       if (d->one_vector_p && (next == nelt))
29103         {
29104           if ((nelt == 2) && (d->vmode != V2DImode))
29105             return false;
29106           else
29107             next = 0;
29108         }
29109
29110       if (d->perm[i] != next)
29111         return false;
29112     }
29113
29114   location = d->perm[0];
29115
29116   switch (d->vmode)
29117     {
29118     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29119     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29120     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29121     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29122     case E_V2SImode: gen = gen_neon_vextv2si; break;
29123     case E_V4SImode: gen = gen_neon_vextv4si; break;
29124     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29125     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29126     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29127     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29128     case E_V2DImode: gen = gen_neon_vextv2di; break;
29129     default:
29130       return false;
29131     }
29132
29133   /* Success! */
29134   if (d->testing_p)
29135     return true;
29136
29137   offset = GEN_INT (location);
29138   emit_insn (gen (d->target, d->op0, d->op1, offset));
29139   return true;
29140 }
29141
29142 /* The NEON VTBL instruction is a fully variable permuation that's even
29143    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29144    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29145    can do slightly better by expanding this as a constant where we don't
29146    have to apply a mask.  */
29147
29148 static bool
29149 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29150 {
29151   rtx rperm[MAX_VECT_LEN], sel;
29152   machine_mode vmode = d->vmode;
29153   unsigned int i, nelt = d->perm.length ();
29154
29155   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29156      numbering of elements for big-endian, we must reverse the order.  */
29157   if (BYTES_BIG_ENDIAN)
29158     return false;
29159
29160   if (d->testing_p)
29161     return true;
29162
29163   /* Generic code will try constant permutation twice.  Once with the
29164      original mode and again with the elements lowered to QImode.
29165      So wait and don't do the selector expansion ourselves.  */
29166   if (vmode != V8QImode && vmode != V16QImode)
29167     return false;
29168
29169   for (i = 0; i < nelt; ++i)
29170     rperm[i] = GEN_INT (d->perm[i]);
29171   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29172   sel = force_reg (vmode, sel);
29173
29174   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29175   return true;
29176 }
29177
29178 static bool
29179 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29180 {
29181   /* Check if the input mask matches vext before reordering the
29182      operands.  */
29183   if (TARGET_NEON)
29184     if (arm_evpc_neon_vext (d))
29185       return true;
29186
29187   /* The pattern matching functions above are written to look for a small
29188      number to begin the sequence (0, 1, N/2).  If we begin with an index
29189      from the second operand, we can swap the operands.  */
29190   unsigned int nelt = d->perm.length ();
29191   if (d->perm[0] >= nelt)
29192     {
29193       for (unsigned int i = 0; i < nelt; ++i)
29194         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29195
29196       std::swap (d->op0, d->op1);
29197     }
29198
29199   if (TARGET_NEON)
29200     {
29201       if (arm_evpc_neon_vuzp (d))
29202         return true;
29203       if (arm_evpc_neon_vzip (d))
29204         return true;
29205       if (arm_evpc_neon_vrev (d))
29206         return true;
29207       if (arm_evpc_neon_vtrn (d))
29208         return true;
29209       return arm_evpc_neon_vtbl (d);
29210     }
29211   return false;
29212 }
29213
29214 /* Expand a vec_perm_const pattern.  */
29215
29216 bool
29217 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29218 {
29219   struct expand_vec_perm_d d;
29220   int i, nelt, which;
29221
29222   d.target = target;
29223   d.op0 = op0;
29224   d.op1 = op1;
29225
29226   d.vmode = GET_MODE (target);
29227   gcc_assert (VECTOR_MODE_P (d.vmode));
29228   d.testing_p = false;
29229
29230   nelt = GET_MODE_NUNITS (d.vmode);
29231   d.perm.reserve (nelt);
29232   for (i = which = 0; i < nelt; ++i)
29233     {
29234       rtx e = XVECEXP (sel, 0, i);
29235       int ei = INTVAL (e) & (2 * nelt - 1);
29236       which |= (ei < nelt ? 1 : 2);
29237       d.perm.quick_push (ei);
29238     }
29239
29240   switch (which)
29241     {
29242     default:
29243       gcc_unreachable();
29244
29245     case 3:
29246       d.one_vector_p = false;
29247       if (!rtx_equal_p (op0, op1))
29248         break;
29249
29250       /* The elements of PERM do not suggest that only the first operand
29251          is used, but both operands are identical.  Allow easier matching
29252          of the permutation by folding the permutation into the single
29253          input vector.  */
29254       /* FALLTHRU */
29255     case 2:
29256       for (i = 0; i < nelt; ++i)
29257         d.perm[i] &= nelt - 1;
29258       d.op0 = op1;
29259       d.one_vector_p = true;
29260       break;
29261
29262     case 1:
29263       d.op1 = op0;
29264       d.one_vector_p = true;
29265       break;
29266     }
29267
29268   return arm_expand_vec_perm_const_1 (&d);
29269 }
29270
29271 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29272
29273 static bool
29274 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29275 {
29276   struct expand_vec_perm_d d;
29277   unsigned int i, nelt, which;
29278   bool ret;
29279
29280   d.vmode = vmode;
29281   d.testing_p = true;
29282   d.perm.safe_splice (sel);
29283
29284   /* Categorize the set of elements in the selector.  */
29285   nelt = GET_MODE_NUNITS (d.vmode);
29286   for (i = which = 0; i < nelt; ++i)
29287     {
29288       unsigned int e = d.perm[i];
29289       gcc_assert (e < 2 * nelt);
29290       which |= (e < nelt ? 1 : 2);
29291     }
29292
29293   /* For all elements from second vector, fold the elements to first.  */
29294   if (which == 2)
29295     for (i = 0; i < nelt; ++i)
29296       d.perm[i] -= nelt;
29297
29298   /* Check whether the mask can be applied to the vector type.  */
29299   d.one_vector_p = (which != 3);
29300
29301   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29302   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29303   if (!d.one_vector_p)
29304     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29305
29306   start_sequence ();
29307   ret = arm_expand_vec_perm_const_1 (&d);
29308   end_sequence ();
29309
29310   return ret;
29311 }
29312
29313 bool
29314 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29315 {
29316   /* If we are soft float and we do not have ldrd
29317      then all auto increment forms are ok.  */
29318   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29319     return true;
29320
29321   switch (code)
29322     {
29323       /* Post increment and Pre Decrement are supported for all
29324          instruction forms except for vector forms.  */
29325     case ARM_POST_INC:
29326     case ARM_PRE_DEC:
29327       if (VECTOR_MODE_P (mode))
29328         {
29329           if (code != ARM_PRE_DEC)
29330             return true;
29331           else
29332             return false;
29333         }
29334
29335       return true;
29336
29337     case ARM_POST_DEC:
29338     case ARM_PRE_INC:
29339       /* Without LDRD and mode size greater than
29340          word size, there is no point in auto-incrementing
29341          because ldm and stm will not have these forms.  */
29342       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29343         return false;
29344
29345       /* Vector and floating point modes do not support
29346          these auto increment forms.  */
29347       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29348         return false;
29349
29350       return true;
29351
29352     default:
29353       return false;
29354
29355     }
29356
29357   return false;
29358 }
29359
29360 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29361    on ARM, since we know that shifts by negative amounts are no-ops.
29362    Additionally, the default expansion code is not available or suitable
29363    for post-reload insn splits (this can occur when the register allocator
29364    chooses not to do a shift in NEON).
29365
29366    This function is used in both initial expand and post-reload splits, and
29367    handles all kinds of 64-bit shifts.
29368
29369    Input requirements:
29370     - It is safe for the input and output to be the same register, but
29371       early-clobber rules apply for the shift amount and scratch registers.
29372     - Shift by register requires both scratch registers.  In all other cases
29373       the scratch registers may be NULL.
29374     - Ashiftrt by a register also clobbers the CC register.  */
29375 void
29376 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29377                                rtx amount, rtx scratch1, rtx scratch2)
29378 {
29379   rtx out_high = gen_highpart (SImode, out);
29380   rtx out_low = gen_lowpart (SImode, out);
29381   rtx in_high = gen_highpart (SImode, in);
29382   rtx in_low = gen_lowpart (SImode, in);
29383
29384   /* Terminology:
29385         in = the register pair containing the input value.
29386         out = the destination register pair.
29387         up = the high- or low-part of each pair.
29388         down = the opposite part to "up".
29389      In a shift, we can consider bits to shift from "up"-stream to
29390      "down"-stream, so in a left-shift "up" is the low-part and "down"
29391      is the high-part of each register pair.  */
29392
29393   rtx out_up   = code == ASHIFT ? out_low : out_high;
29394   rtx out_down = code == ASHIFT ? out_high : out_low;
29395   rtx in_up   = code == ASHIFT ? in_low : in_high;
29396   rtx in_down = code == ASHIFT ? in_high : in_low;
29397
29398   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29399   gcc_assert (out
29400               && (REG_P (out) || GET_CODE (out) == SUBREG)
29401               && GET_MODE (out) == DImode);
29402   gcc_assert (in
29403               && (REG_P (in) || GET_CODE (in) == SUBREG)
29404               && GET_MODE (in) == DImode);
29405   gcc_assert (amount
29406               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29407                    && GET_MODE (amount) == SImode)
29408                   || CONST_INT_P (amount)));
29409   gcc_assert (scratch1 == NULL
29410               || (GET_CODE (scratch1) == SCRATCH)
29411               || (GET_MODE (scratch1) == SImode
29412                   && REG_P (scratch1)));
29413   gcc_assert (scratch2 == NULL
29414               || (GET_CODE (scratch2) == SCRATCH)
29415               || (GET_MODE (scratch2) == SImode
29416                   && REG_P (scratch2)));
29417   gcc_assert (!REG_P (out) || !REG_P (amount)
29418               || !HARD_REGISTER_P (out)
29419               || (REGNO (out) != REGNO (amount)
29420                   && REGNO (out) + 1 != REGNO (amount)));
29421
29422   /* Macros to make following code more readable.  */
29423   #define SUB_32(DEST,SRC) \
29424             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29425   #define RSB_32(DEST,SRC) \
29426             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29427   #define SUB_S_32(DEST,SRC) \
29428             gen_addsi3_compare0 ((DEST), (SRC), \
29429                                  GEN_INT (-32))
29430   #define SET(DEST,SRC) \
29431             gen_rtx_SET ((DEST), (SRC))
29432   #define SHIFT(CODE,SRC,AMOUNT) \
29433             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29434   #define LSHIFT(CODE,SRC,AMOUNT) \
29435             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29436                             SImode, (SRC), (AMOUNT))
29437   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29438             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29439                             SImode, (SRC), (AMOUNT))
29440   #define ORR(A,B) \
29441             gen_rtx_IOR (SImode, (A), (B))
29442   #define BRANCH(COND,LABEL) \
29443             gen_arm_cond_branch ((LABEL), \
29444                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29445                                                    const0_rtx), \
29446                                  cc_reg)
29447
29448   /* Shifts by register and shifts by constant are handled separately.  */
29449   if (CONST_INT_P (amount))
29450     {
29451       /* We have a shift-by-constant.  */
29452
29453       /* First, handle out-of-range shift amounts.
29454          In both cases we try to match the result an ARM instruction in a
29455          shift-by-register would give.  This helps reduce execution
29456          differences between optimization levels, but it won't stop other
29457          parts of the compiler doing different things.  This is "undefined
29458          behavior, in any case.  */
29459       if (INTVAL (amount) <= 0)
29460         emit_insn (gen_movdi (out, in));
29461       else if (INTVAL (amount) >= 64)
29462         {
29463           if (code == ASHIFTRT)
29464             {
29465               rtx const31_rtx = GEN_INT (31);
29466               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29467               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29468             }
29469           else
29470             emit_insn (gen_movdi (out, const0_rtx));
29471         }
29472
29473       /* Now handle valid shifts. */
29474       else if (INTVAL (amount) < 32)
29475         {
29476           /* Shifts by a constant less than 32.  */
29477           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29478
29479           /* Clearing the out register in DImode first avoids lots
29480              of spilling and results in less stack usage.
29481              Later this redundant insn is completely removed.
29482              Do that only if "in" and "out" are different registers.  */
29483           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29484             emit_insn (SET (out, const0_rtx));
29485           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29486           emit_insn (SET (out_down,
29487                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29488                                out_down)));
29489           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29490         }
29491       else
29492         {
29493           /* Shifts by a constant greater than 31.  */
29494           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29495
29496           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29497             emit_insn (SET (out, const0_rtx));
29498           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29499           if (code == ASHIFTRT)
29500             emit_insn (gen_ashrsi3 (out_up, in_up,
29501                                     GEN_INT (31)));
29502           else
29503             emit_insn (SET (out_up, const0_rtx));
29504         }
29505     }
29506   else
29507     {
29508       /* We have a shift-by-register.  */
29509       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29510
29511       /* This alternative requires the scratch registers.  */
29512       gcc_assert (scratch1 && REG_P (scratch1));
29513       gcc_assert (scratch2 && REG_P (scratch2));
29514
29515       /* We will need the values "amount-32" and "32-amount" later.
29516          Swapping them around now allows the later code to be more general. */
29517       switch (code)
29518         {
29519         case ASHIFT:
29520           emit_insn (SUB_32 (scratch1, amount));
29521           emit_insn (RSB_32 (scratch2, amount));
29522           break;
29523         case ASHIFTRT:
29524           emit_insn (RSB_32 (scratch1, amount));
29525           /* Also set CC = amount > 32.  */
29526           emit_insn (SUB_S_32 (scratch2, amount));
29527           break;
29528         case LSHIFTRT:
29529           emit_insn (RSB_32 (scratch1, amount));
29530           emit_insn (SUB_32 (scratch2, amount));
29531           break;
29532         default:
29533           gcc_unreachable ();
29534         }
29535
29536       /* Emit code like this:
29537
29538          arithmetic-left:
29539             out_down = in_down << amount;
29540             out_down = (in_up << (amount - 32)) | out_down;
29541             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29542             out_up = in_up << amount;
29543
29544          arithmetic-right:
29545             out_down = in_down >> amount;
29546             out_down = (in_up << (32 - amount)) | out_down;
29547             if (amount < 32)
29548               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29549             out_up = in_up << amount;
29550
29551          logical-right:
29552             out_down = in_down >> amount;
29553             out_down = (in_up << (32 - amount)) | out_down;
29554             if (amount < 32)
29555               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29556             out_up = in_up << amount;
29557
29558           The ARM and Thumb2 variants are the same but implemented slightly
29559           differently.  If this were only called during expand we could just
29560           use the Thumb2 case and let combine do the right thing, but this
29561           can also be called from post-reload splitters.  */
29562
29563       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29564
29565       if (!TARGET_THUMB2)
29566         {
29567           /* Emit code for ARM mode.  */
29568           emit_insn (SET (out_down,
29569                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29570           if (code == ASHIFTRT)
29571             {
29572               rtx_code_label *done_label = gen_label_rtx ();
29573               emit_jump_insn (BRANCH (LT, done_label));
29574               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29575                                              out_down)));
29576               emit_label (done_label);
29577             }
29578           else
29579             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29580                                            out_down)));
29581         }
29582       else
29583         {
29584           /* Emit code for Thumb2 mode.
29585              Thumb2 can't do shift and or in one insn.  */
29586           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29587           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29588
29589           if (code == ASHIFTRT)
29590             {
29591               rtx_code_label *done_label = gen_label_rtx ();
29592               emit_jump_insn (BRANCH (LT, done_label));
29593               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29594               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29595               emit_label (done_label);
29596             }
29597           else
29598             {
29599               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29600               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29601             }
29602         }
29603
29604       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29605     }
29606
29607   #undef SUB_32
29608   #undef RSB_32
29609   #undef SUB_S_32
29610   #undef SET
29611   #undef SHIFT
29612   #undef LSHIFT
29613   #undef REV_LSHIFT
29614   #undef ORR
29615   #undef BRANCH
29616 }
29617
29618 /* Returns true if the pattern is a valid symbolic address, which is either a
29619    symbol_ref or (symbol_ref + addend).
29620
29621    According to the ARM ELF ABI, the initial addend of REL-type relocations
29622    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29623    literal field of the instruction as a 16-bit signed value in the range
29624    -32768 <= A < 32768.  */
29625
29626 bool
29627 arm_valid_symbolic_address_p (rtx addr)
29628 {
29629   rtx xop0, xop1 = NULL_RTX;
29630   rtx tmp = addr;
29631
29632   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29633     return true;
29634
29635   /* (const (plus: symbol_ref const_int))  */
29636   if (GET_CODE (addr) == CONST)
29637     tmp = XEXP (addr, 0);
29638
29639   if (GET_CODE (tmp) == PLUS)
29640     {
29641       xop0 = XEXP (tmp, 0);
29642       xop1 = XEXP (tmp, 1);
29643
29644       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29645           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29646     }
29647
29648   return false;
29649 }
29650
29651 /* Returns true if a valid comparison operation and makes
29652    the operands in a form that is valid.  */
29653 bool
29654 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29655 {
29656   enum rtx_code code = GET_CODE (*comparison);
29657   int code_int;
29658   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29659     ? GET_MODE (*op2) : GET_MODE (*op1);
29660
29661   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29662
29663   if (code == UNEQ || code == LTGT)
29664     return false;
29665
29666   code_int = (int)code;
29667   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29668   PUT_CODE (*comparison, (enum rtx_code)code_int);
29669
29670   switch (mode)
29671     {
29672     case E_SImode:
29673       if (!arm_add_operand (*op1, mode))
29674         *op1 = force_reg (mode, *op1);
29675       if (!arm_add_operand (*op2, mode))
29676         *op2 = force_reg (mode, *op2);
29677       return true;
29678
29679     case E_DImode:
29680       if (!cmpdi_operand (*op1, mode))
29681         *op1 = force_reg (mode, *op1);
29682       if (!cmpdi_operand (*op2, mode))
29683         *op2 = force_reg (mode, *op2);
29684       return true;
29685
29686     case E_HFmode:
29687       if (!TARGET_VFP_FP16INST)
29688         break;
29689       /* FP16 comparisons are done in SF mode.  */
29690       mode = SFmode;
29691       *op1 = convert_to_mode (mode, *op1, 1);
29692       *op2 = convert_to_mode (mode, *op2, 1);
29693       /* Fall through.  */
29694     case E_SFmode:
29695     case E_DFmode:
29696       if (!vfp_compare_operand (*op1, mode))
29697         *op1 = force_reg (mode, *op1);
29698       if (!vfp_compare_operand (*op2, mode))
29699         *op2 = force_reg (mode, *op2);
29700       return true;
29701     default:
29702       break;
29703     }
29704
29705   return false;
29706
29707 }
29708
29709 /* Maximum number of instructions to set block of memory.  */
29710 static int
29711 arm_block_set_max_insns (void)
29712 {
29713   if (optimize_function_for_size_p (cfun))
29714     return 4;
29715   else
29716     return current_tune->max_insns_inline_memset;
29717 }
29718
29719 /* Return TRUE if it's profitable to set block of memory for
29720    non-vectorized case.  VAL is the value to set the memory
29721    with.  LENGTH is the number of bytes to set.  ALIGN is the
29722    alignment of the destination memory in bytes.  UNALIGNED_P
29723    is TRUE if we can only set the memory with instructions
29724    meeting alignment requirements.  USE_STRD_P is TRUE if we
29725    can use strd to set the memory.  */
29726 static bool
29727 arm_block_set_non_vect_profit_p (rtx val,
29728                                  unsigned HOST_WIDE_INT length,
29729                                  unsigned HOST_WIDE_INT align,
29730                                  bool unaligned_p, bool use_strd_p)
29731 {
29732   int num = 0;
29733   /* For leftovers in bytes of 0-7, we can set the memory block using
29734      strb/strh/str with minimum instruction number.  */
29735   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29736
29737   if (unaligned_p)
29738     {
29739       num = arm_const_inline_cost (SET, val);
29740       num += length / align + length % align;
29741     }
29742   else if (use_strd_p)
29743     {
29744       num = arm_const_double_inline_cost (val);
29745       num += (length >> 3) + leftover[length & 7];
29746     }
29747   else
29748     {
29749       num = arm_const_inline_cost (SET, val);
29750       num += (length >> 2) + leftover[length & 3];
29751     }
29752
29753   /* We may be able to combine last pair STRH/STRB into a single STR
29754      by shifting one byte back.  */
29755   if (unaligned_access && length > 3 && (length & 3) == 3)
29756     num--;
29757
29758   return (num <= arm_block_set_max_insns ());
29759 }
29760
29761 /* Return TRUE if it's profitable to set block of memory for
29762    vectorized case.  LENGTH is the number of bytes to set.
29763    ALIGN is the alignment of destination memory in bytes.
29764    MODE is the vector mode used to set the memory.  */
29765 static bool
29766 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29767                              unsigned HOST_WIDE_INT align,
29768                              machine_mode mode)
29769 {
29770   int num;
29771   bool unaligned_p = ((align & 3) != 0);
29772   unsigned int nelt = GET_MODE_NUNITS (mode);
29773
29774   /* Instruction loading constant value.  */
29775   num = 1;
29776   /* Instructions storing the memory.  */
29777   num += (length + nelt - 1) / nelt;
29778   /* Instructions adjusting the address expression.  Only need to
29779      adjust address expression if it's 4 bytes aligned and bytes
29780      leftover can only be stored by mis-aligned store instruction.  */
29781   if (!unaligned_p && (length & 3) != 0)
29782     num++;
29783
29784   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29785   if (!unaligned_p && mode == V16QImode)
29786     num--;
29787
29788   return (num <= arm_block_set_max_insns ());
29789 }
29790
29791 /* Set a block of memory using vectorization instructions for the
29792    unaligned case.  We fill the first LENGTH bytes of the memory
29793    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29794    the alignment requirement of memory.  Return TRUE if succeeded.  */
29795 static bool
29796 arm_block_set_unaligned_vect (rtx dstbase,
29797                               unsigned HOST_WIDE_INT length,
29798                               unsigned HOST_WIDE_INT value,
29799                               unsigned HOST_WIDE_INT align)
29800 {
29801   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29802   rtx dst, mem;
29803   rtx val_elt, val_vec, reg;
29804   rtx rval[MAX_VECT_LEN];
29805   rtx (*gen_func) (rtx, rtx);
29806   machine_mode mode;
29807   unsigned HOST_WIDE_INT v = value;
29808   unsigned int offset = 0;
29809   gcc_assert ((align & 0x3) != 0);
29810   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29811   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29812   if (length >= nelt_v16)
29813     {
29814       mode = V16QImode;
29815       gen_func = gen_movmisalignv16qi;
29816     }
29817   else
29818     {
29819       mode = V8QImode;
29820       gen_func = gen_movmisalignv8qi;
29821     }
29822   nelt_mode = GET_MODE_NUNITS (mode);
29823   gcc_assert (length >= nelt_mode);
29824   /* Skip if it isn't profitable.  */
29825   if (!arm_block_set_vect_profit_p (length, align, mode))
29826     return false;
29827
29828   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29829   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29830
29831   v = sext_hwi (v, BITS_PER_WORD);
29832   val_elt = GEN_INT (v);
29833   for (j = 0; j < nelt_mode; j++)
29834     rval[j] = val_elt;
29835
29836   reg = gen_reg_rtx (mode);
29837   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29838   /* Emit instruction loading the constant value.  */
29839   emit_move_insn (reg, val_vec);
29840
29841   /* Handle nelt_mode bytes in a vector.  */
29842   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29843     {
29844       emit_insn ((*gen_func) (mem, reg));
29845       if (i + 2 * nelt_mode <= length)
29846         {
29847           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29848           offset += nelt_mode;
29849           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29850         }
29851     }
29852
29853   /* If there are not less than nelt_v8 bytes leftover, we must be in
29854      V16QI mode.  */
29855   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29856
29857   /* Handle (8, 16) bytes leftover.  */
29858   if (i + nelt_v8 < length)
29859     {
29860       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29861       offset += length - i;
29862       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29863
29864       /* We are shifting bytes back, set the alignment accordingly.  */
29865       if ((length & 1) != 0 && align >= 2)
29866         set_mem_align (mem, BITS_PER_UNIT);
29867
29868       emit_insn (gen_movmisalignv16qi (mem, reg));
29869     }
29870   /* Handle (0, 8] bytes leftover.  */
29871   else if (i < length && i + nelt_v8 >= length)
29872     {
29873       if (mode == V16QImode)
29874         reg = gen_lowpart (V8QImode, reg);
29875
29876       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29877                                               + (nelt_mode - nelt_v8))));
29878       offset += (length - i) + (nelt_mode - nelt_v8);
29879       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29880
29881       /* We are shifting bytes back, set the alignment accordingly.  */
29882       if ((length & 1) != 0 && align >= 2)
29883         set_mem_align (mem, BITS_PER_UNIT);
29884
29885       emit_insn (gen_movmisalignv8qi (mem, reg));
29886     }
29887
29888   return true;
29889 }
29890
29891 /* Set a block of memory using vectorization instructions for the
29892    aligned case.  We fill the first LENGTH bytes of the memory area
29893    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29894    alignment requirement of memory.  Return TRUE if succeeded.  */
29895 static bool
29896 arm_block_set_aligned_vect (rtx dstbase,
29897                             unsigned HOST_WIDE_INT length,
29898                             unsigned HOST_WIDE_INT value,
29899                             unsigned HOST_WIDE_INT align)
29900 {
29901   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29902   rtx dst, addr, mem;
29903   rtx val_elt, val_vec, reg;
29904   rtx rval[MAX_VECT_LEN];
29905   machine_mode mode;
29906   unsigned HOST_WIDE_INT v = value;
29907   unsigned int offset = 0;
29908
29909   gcc_assert ((align & 0x3) == 0);
29910   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29911   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29912   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29913     mode = V16QImode;
29914   else
29915     mode = V8QImode;
29916
29917   nelt_mode = GET_MODE_NUNITS (mode);
29918   gcc_assert (length >= nelt_mode);
29919   /* Skip if it isn't profitable.  */
29920   if (!arm_block_set_vect_profit_p (length, align, mode))
29921     return false;
29922
29923   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29924
29925   v = sext_hwi (v, BITS_PER_WORD);
29926   val_elt = GEN_INT (v);
29927   for (j = 0; j < nelt_mode; j++)
29928     rval[j] = val_elt;
29929
29930   reg = gen_reg_rtx (mode);
29931   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29932   /* Emit instruction loading the constant value.  */
29933   emit_move_insn (reg, val_vec);
29934
29935   i = 0;
29936   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29937   if (mode == V16QImode)
29938     {
29939       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29940       emit_insn (gen_movmisalignv16qi (mem, reg));
29941       i += nelt_mode;
29942       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29943       if (i + nelt_v8 < length && i + nelt_v16 > length)
29944         {
29945           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29946           offset += length - nelt_mode;
29947           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29948           /* We are shifting bytes back, set the alignment accordingly.  */
29949           if ((length & 0x3) == 0)
29950             set_mem_align (mem, BITS_PER_UNIT * 4);
29951           else if ((length & 0x1) == 0)
29952             set_mem_align (mem, BITS_PER_UNIT * 2);
29953           else
29954             set_mem_align (mem, BITS_PER_UNIT);
29955
29956           emit_insn (gen_movmisalignv16qi (mem, reg));
29957           return true;
29958         }
29959       /* Fall through for bytes leftover.  */
29960       mode = V8QImode;
29961       nelt_mode = GET_MODE_NUNITS (mode);
29962       reg = gen_lowpart (V8QImode, reg);
29963     }
29964
29965   /* Handle 8 bytes in a vector.  */
29966   for (; (i + nelt_mode <= length); i += nelt_mode)
29967     {
29968       addr = plus_constant (Pmode, dst, i);
29969       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29970       emit_move_insn (mem, reg);
29971     }
29972
29973   /* Handle single word leftover by shifting 4 bytes back.  We can
29974      use aligned access for this case.  */
29975   if (i + UNITS_PER_WORD == length)
29976     {
29977       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29978       offset += i - UNITS_PER_WORD;
29979       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29980       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29981       if (align > UNITS_PER_WORD)
29982         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29983
29984       emit_move_insn (mem, reg);
29985     }
29986   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29987      We have to use unaligned access for this case.  */
29988   else if (i < length)
29989     {
29990       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29991       offset += length - nelt_mode;
29992       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29993       /* We are shifting bytes back, set the alignment accordingly.  */
29994       if ((length & 1) == 0)
29995         set_mem_align (mem, BITS_PER_UNIT * 2);
29996       else
29997         set_mem_align (mem, BITS_PER_UNIT);
29998
29999       emit_insn (gen_movmisalignv8qi (mem, reg));
30000     }
30001
30002   return true;
30003 }
30004
30005 /* Set a block of memory using plain strh/strb instructions, only
30006    using instructions allowed by ALIGN on processor.  We fill the
30007    first LENGTH bytes of the memory area starting from DSTBASE
30008    with byte constant VALUE.  ALIGN is the alignment requirement
30009    of memory.  */
30010 static bool
30011 arm_block_set_unaligned_non_vect (rtx dstbase,
30012                                   unsigned HOST_WIDE_INT length,
30013                                   unsigned HOST_WIDE_INT value,
30014                                   unsigned HOST_WIDE_INT align)
30015 {
30016   unsigned int i;
30017   rtx dst, addr, mem;
30018   rtx val_exp, val_reg, reg;
30019   machine_mode mode;
30020   HOST_WIDE_INT v = value;
30021
30022   gcc_assert (align == 1 || align == 2);
30023
30024   if (align == 2)
30025     v |= (value << BITS_PER_UNIT);
30026
30027   v = sext_hwi (v, BITS_PER_WORD);
30028   val_exp = GEN_INT (v);
30029   /* Skip if it isn't profitable.  */
30030   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30031                                         align, true, false))
30032     return false;
30033
30034   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30035   mode = (align == 2 ? HImode : QImode);
30036   val_reg = force_reg (SImode, val_exp);
30037   reg = gen_lowpart (mode, val_reg);
30038
30039   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30040     {
30041       addr = plus_constant (Pmode, dst, i);
30042       mem = adjust_automodify_address (dstbase, mode, addr, i);
30043       emit_move_insn (mem, reg);
30044     }
30045
30046   /* Handle single byte leftover.  */
30047   if (i + 1 == length)
30048     {
30049       reg = gen_lowpart (QImode, val_reg);
30050       addr = plus_constant (Pmode, dst, i);
30051       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30052       emit_move_insn (mem, reg);
30053       i++;
30054     }
30055
30056   gcc_assert (i == length);
30057   return true;
30058 }
30059
30060 /* Set a block of memory using plain strd/str/strh/strb instructions,
30061    to permit unaligned copies on processors which support unaligned
30062    semantics for those instructions.  We fill the first LENGTH bytes
30063    of the memory area starting from DSTBASE with byte constant VALUE.
30064    ALIGN is the alignment requirement of memory.  */
30065 static bool
30066 arm_block_set_aligned_non_vect (rtx dstbase,
30067                                 unsigned HOST_WIDE_INT length,
30068                                 unsigned HOST_WIDE_INT value,
30069                                 unsigned HOST_WIDE_INT align)
30070 {
30071   unsigned int i;
30072   rtx dst, addr, mem;
30073   rtx val_exp, val_reg, reg;
30074   unsigned HOST_WIDE_INT v;
30075   bool use_strd_p;
30076
30077   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30078                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30079
30080   v = (value | (value << 8) | (value << 16) | (value << 24));
30081   if (length < UNITS_PER_WORD)
30082     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30083
30084   if (use_strd_p)
30085     v |= (v << BITS_PER_WORD);
30086   else
30087     v = sext_hwi (v, BITS_PER_WORD);
30088
30089   val_exp = GEN_INT (v);
30090   /* Skip if it isn't profitable.  */
30091   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30092                                         align, false, use_strd_p))
30093     {
30094       if (!use_strd_p)
30095         return false;
30096
30097       /* Try without strd.  */
30098       v = (v >> BITS_PER_WORD);
30099       v = sext_hwi (v, BITS_PER_WORD);
30100       val_exp = GEN_INT (v);
30101       use_strd_p = false;
30102       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30103                                             align, false, use_strd_p))
30104         return false;
30105     }
30106
30107   i = 0;
30108   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30109   /* Handle double words using strd if possible.  */
30110   if (use_strd_p)
30111     {
30112       val_reg = force_reg (DImode, val_exp);
30113       reg = val_reg;
30114       for (; (i + 8 <= length); i += 8)
30115         {
30116           addr = plus_constant (Pmode, dst, i);
30117           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30118           emit_move_insn (mem, reg);
30119         }
30120     }
30121   else
30122     val_reg = force_reg (SImode, val_exp);
30123
30124   /* Handle words.  */
30125   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30126   for (; (i + 4 <= length); i += 4)
30127     {
30128       addr = plus_constant (Pmode, dst, i);
30129       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30130       if ((align & 3) == 0)
30131         emit_move_insn (mem, reg);
30132       else
30133         emit_insn (gen_unaligned_storesi (mem, reg));
30134     }
30135
30136   /* Merge last pair of STRH and STRB into a STR if possible.  */
30137   if (unaligned_access && i > 0 && (i + 3) == length)
30138     {
30139       addr = plus_constant (Pmode, dst, i - 1);
30140       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30141       /* We are shifting one byte back, set the alignment accordingly.  */
30142       if ((align & 1) == 0)
30143         set_mem_align (mem, BITS_PER_UNIT);
30144
30145       /* Most likely this is an unaligned access, and we can't tell at
30146          compilation time.  */
30147       emit_insn (gen_unaligned_storesi (mem, reg));
30148       return true;
30149     }
30150
30151   /* Handle half word leftover.  */
30152   if (i + 2 <= length)
30153     {
30154       reg = gen_lowpart (HImode, val_reg);
30155       addr = plus_constant (Pmode, dst, i);
30156       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30157       if ((align & 1) == 0)
30158         emit_move_insn (mem, reg);
30159       else
30160         emit_insn (gen_unaligned_storehi (mem, reg));
30161
30162       i += 2;
30163     }
30164
30165   /* Handle single byte leftover.  */
30166   if (i + 1 == length)
30167     {
30168       reg = gen_lowpart (QImode, val_reg);
30169       addr = plus_constant (Pmode, dst, i);
30170       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30171       emit_move_insn (mem, reg);
30172     }
30173
30174   return true;
30175 }
30176
30177 /* Set a block of memory using vectorization instructions for both
30178    aligned and unaligned cases.  We fill the first LENGTH bytes of
30179    the memory area starting from DSTBASE with byte constant VALUE.
30180    ALIGN is the alignment requirement of memory.  */
30181 static bool
30182 arm_block_set_vect (rtx dstbase,
30183                     unsigned HOST_WIDE_INT length,
30184                     unsigned HOST_WIDE_INT value,
30185                     unsigned HOST_WIDE_INT align)
30186 {
30187   /* Check whether we need to use unaligned store instruction.  */
30188   if (((align & 3) != 0 || (length & 3) != 0)
30189       /* Check whether unaligned store instruction is available.  */
30190       && (!unaligned_access || BYTES_BIG_ENDIAN))
30191     return false;
30192
30193   if ((align & 3) == 0)
30194     return arm_block_set_aligned_vect (dstbase, length, value, align);
30195   else
30196     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30197 }
30198
30199 /* Expand string store operation.  Firstly we try to do that by using
30200    vectorization instructions, then try with ARM unaligned access and
30201    double-word store if profitable.  OPERANDS[0] is the destination,
30202    OPERANDS[1] is the number of bytes, operands[2] is the value to
30203    initialize the memory, OPERANDS[3] is the known alignment of the
30204    destination.  */
30205 bool
30206 arm_gen_setmem (rtx *operands)
30207 {
30208   rtx dstbase = operands[0];
30209   unsigned HOST_WIDE_INT length;
30210   unsigned HOST_WIDE_INT value;
30211   unsigned HOST_WIDE_INT align;
30212
30213   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30214     return false;
30215
30216   length = UINTVAL (operands[1]);
30217   if (length > 64)
30218     return false;
30219
30220   value = (UINTVAL (operands[2]) & 0xFF);
30221   align = UINTVAL (operands[3]);
30222   if (TARGET_NEON && length >= 8
30223       && current_tune->string_ops_prefer_neon
30224       && arm_block_set_vect (dstbase, length, value, align))
30225     return true;
30226
30227   if (!unaligned_access && (align & 3) != 0)
30228     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30229
30230   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30231 }
30232
30233
30234 static bool
30235 arm_macro_fusion_p (void)
30236 {
30237   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30238 }
30239
30240 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30241    for MOVW / MOVT macro fusion.  */
30242
30243 static bool
30244 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30245 {
30246   /* We are trying to fuse
30247      movw imm / movt imm
30248     instructions as a group that gets scheduled together.  */
30249
30250   rtx set_dest = SET_DEST (curr_set);
30251
30252   if (GET_MODE (set_dest) != SImode)
30253     return false;
30254
30255   /* We are trying to match:
30256      prev (movw)  == (set (reg r0) (const_int imm16))
30257      curr (movt) == (set (zero_extract (reg r0)
30258                                         (const_int 16)
30259                                         (const_int 16))
30260                           (const_int imm16_1))
30261      or
30262      prev (movw) == (set (reg r1)
30263                           (high (symbol_ref ("SYM"))))
30264     curr (movt) == (set (reg r0)
30265                         (lo_sum (reg r1)
30266                                 (symbol_ref ("SYM"))))  */
30267
30268     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30269       {
30270         if (CONST_INT_P (SET_SRC (curr_set))
30271             && CONST_INT_P (SET_SRC (prev_set))
30272             && REG_P (XEXP (set_dest, 0))
30273             && REG_P (SET_DEST (prev_set))
30274             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30275           return true;
30276
30277       }
30278     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30279              && REG_P (SET_DEST (curr_set))
30280              && REG_P (SET_DEST (prev_set))
30281              && GET_CODE (SET_SRC (prev_set)) == HIGH
30282              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30283       return true;
30284
30285   return false;
30286 }
30287
30288 static bool
30289 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30290 {
30291   rtx prev_set = single_set (prev);
30292   rtx curr_set = single_set (curr);
30293
30294   if (!prev_set
30295       || !curr_set)
30296     return false;
30297
30298   if (any_condjump_p (curr))
30299     return false;
30300
30301   if (!arm_macro_fusion_p ())
30302     return false;
30303
30304   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30305       && aarch_crypto_can_dual_issue (prev, curr))
30306     return true;
30307
30308   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30309       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30310     return true;
30311
30312   return false;
30313 }
30314
30315 /* Return true iff the instruction fusion described by OP is enabled.  */
30316 bool
30317 arm_fusion_enabled_p (tune_params::fuse_ops op)
30318 {
30319   return current_tune->fusible_ops & op;
30320 }
30321
30322 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30323    scheduled for speculative execution.  Reject the long-running division
30324    and square-root instructions.  */
30325
30326 static bool
30327 arm_sched_can_speculate_insn (rtx_insn *insn)
30328 {
30329   switch (get_attr_type (insn))
30330     {
30331       case TYPE_SDIV:
30332       case TYPE_UDIV:
30333       case TYPE_FDIVS:
30334       case TYPE_FDIVD:
30335       case TYPE_FSQRTS:
30336       case TYPE_FSQRTD:
30337       case TYPE_NEON_FP_SQRT_S:
30338       case TYPE_NEON_FP_SQRT_D:
30339       case TYPE_NEON_FP_SQRT_S_Q:
30340       case TYPE_NEON_FP_SQRT_D_Q:
30341       case TYPE_NEON_FP_DIV_S:
30342       case TYPE_NEON_FP_DIV_D:
30343       case TYPE_NEON_FP_DIV_S_Q:
30344       case TYPE_NEON_FP_DIV_D_Q:
30345         return false;
30346       default:
30347         return true;
30348     }
30349 }
30350
30351 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30352
30353 static unsigned HOST_WIDE_INT
30354 arm_asan_shadow_offset (void)
30355 {
30356   return HOST_WIDE_INT_1U << 29;
30357 }
30358
30359
30360 /* This is a temporary fix for PR60655.  Ideally we need
30361    to handle most of these cases in the generic part but
30362    currently we reject minus (..) (sym_ref).  We try to
30363    ameliorate the case with minus (sym_ref1) (sym_ref2)
30364    where they are in the same section.  */
30365
30366 static bool
30367 arm_const_not_ok_for_debug_p (rtx p)
30368 {
30369   tree decl_op0 = NULL;
30370   tree decl_op1 = NULL;
30371
30372   if (GET_CODE (p) == MINUS)
30373     {
30374       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30375         {
30376           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30377           if (decl_op1
30378               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30379               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30380             {
30381               if ((VAR_P (decl_op1)
30382                    || TREE_CODE (decl_op1) == CONST_DECL)
30383                   && (VAR_P (decl_op0)
30384                       || TREE_CODE (decl_op0) == CONST_DECL))
30385                 return (get_variable_section (decl_op1, false)
30386                         != get_variable_section (decl_op0, false));
30387
30388               if (TREE_CODE (decl_op1) == LABEL_DECL
30389                   && TREE_CODE (decl_op0) == LABEL_DECL)
30390                 return (DECL_CONTEXT (decl_op1)
30391                         != DECL_CONTEXT (decl_op0));
30392             }
30393
30394           return true;
30395         }
30396     }
30397
30398   return false;
30399 }
30400
30401 /* return TRUE if x is a reference to a value in a constant pool */
30402 extern bool
30403 arm_is_constant_pool_ref (rtx x)
30404 {
30405   return (MEM_P (x)
30406           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30407           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30408 }
30409
30410 /* Remember the last target of arm_set_current_function.  */
30411 static GTY(()) tree arm_previous_fndecl;
30412
30413 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30414
30415 void
30416 save_restore_target_globals (tree new_tree)
30417 {
30418   /* If we have a previous state, use it.  */
30419   if (TREE_TARGET_GLOBALS (new_tree))
30420     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30421   else if (new_tree == target_option_default_node)
30422     restore_target_globals (&default_target_globals);
30423   else
30424     {
30425       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30426       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30427     }
30428
30429   arm_option_params_internal ();
30430 }
30431
30432 /* Invalidate arm_previous_fndecl.  */
30433
30434 void
30435 arm_reset_previous_fndecl (void)
30436 {
30437   arm_previous_fndecl = NULL_TREE;
30438 }
30439
30440 /* Establish appropriate back-end context for processing the function
30441    FNDECL.  The argument might be NULL to indicate processing at top
30442    level, outside of any function scope.  */
30443
30444 static void
30445 arm_set_current_function (tree fndecl)
30446 {
30447   if (!fndecl || fndecl == arm_previous_fndecl)
30448     return;
30449
30450   tree old_tree = (arm_previous_fndecl
30451                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30452                    : NULL_TREE);
30453
30454   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30455
30456   /* If current function has no attributes but previous one did,
30457      use the default node.  */
30458   if (! new_tree && old_tree)
30459     new_tree = target_option_default_node;
30460
30461   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30462      the default have been handled by save_restore_target_globals from
30463      arm_pragma_target_parse.  */
30464   if (old_tree == new_tree)
30465     return;
30466
30467   arm_previous_fndecl = fndecl;
30468
30469   /* First set the target options.  */
30470   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30471
30472   save_restore_target_globals (new_tree);
30473 }
30474
30475 /* Implement TARGET_OPTION_PRINT.  */
30476
30477 static void
30478 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30479 {
30480   int flags = ptr->x_target_flags;
30481   const char *fpu_name;
30482
30483   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30484               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30485
30486   fprintf (file, "%*sselected isa %s\n", indent, "",
30487            TARGET_THUMB2_P (flags) ? "thumb2" :
30488            TARGET_THUMB_P (flags) ? "thumb1" :
30489            "arm");
30490
30491   if (ptr->x_arm_arch_string)
30492     fprintf (file, "%*sselected architecture %s\n", indent, "",
30493              ptr->x_arm_arch_string);
30494
30495   if (ptr->x_arm_cpu_string)
30496     fprintf (file, "%*sselected CPU %s\n", indent, "",
30497              ptr->x_arm_cpu_string);
30498
30499   if (ptr->x_arm_tune_string)
30500     fprintf (file, "%*sselected tune %s\n", indent, "",
30501              ptr->x_arm_tune_string);
30502
30503   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30504 }
30505
30506 /* Hook to determine if one function can safely inline another.  */
30507
30508 static bool
30509 arm_can_inline_p (tree caller, tree callee)
30510 {
30511   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30512   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30513   bool can_inline = true;
30514
30515   struct cl_target_option *caller_opts
30516         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30517                                            : target_option_default_node);
30518
30519   struct cl_target_option *callee_opts
30520         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30521                                            : target_option_default_node);
30522
30523   if (callee_opts == caller_opts)
30524     return true;
30525
30526   /* Callee's ISA features should be a subset of the caller's.  */
30527   struct arm_build_target caller_target;
30528   struct arm_build_target callee_target;
30529   caller_target.isa = sbitmap_alloc (isa_num_bits);
30530   callee_target.isa = sbitmap_alloc (isa_num_bits);
30531
30532   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30533                               false);
30534   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30535                               false);
30536   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30537     can_inline = false;
30538
30539   sbitmap_free (caller_target.isa);
30540   sbitmap_free (callee_target.isa);
30541
30542   /* OK to inline between different modes.
30543      Function with mode specific instructions, e.g using asm,
30544      must be explicitly protected with noinline.  */
30545   return can_inline;
30546 }
30547
30548 /* Hook to fix function's alignment affected by target attribute.  */
30549
30550 static void
30551 arm_relayout_function (tree fndecl)
30552 {
30553   if (DECL_USER_ALIGN (fndecl))
30554     return;
30555
30556   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30557
30558   if (!callee_tree)
30559     callee_tree = target_option_default_node;
30560
30561   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30562   SET_DECL_ALIGN
30563     (fndecl,
30564      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30565 }
30566
30567 /* Inner function to process the attribute((target(...))), take an argument and
30568    set the current options from the argument.  If we have a list, recursively
30569    go over the list.  */
30570
30571 static bool
30572 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30573 {
30574   if (TREE_CODE (args) == TREE_LIST)
30575     {
30576       bool ret = true;
30577
30578       for (; args; args = TREE_CHAIN (args))
30579         if (TREE_VALUE (args)
30580             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30581           ret = false;
30582       return ret;
30583     }
30584
30585   else if (TREE_CODE (args) != STRING_CST)
30586     {
30587       error ("attribute %<target%> argument not a string");
30588       return false;
30589     }
30590
30591   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30592   char *q;
30593
30594   while ((q = strtok (argstr, ",")) != NULL)
30595     {
30596       while (ISSPACE (*q)) ++q;
30597
30598       argstr = NULL;
30599       if (!strncmp (q, "thumb", 5))
30600           opts->x_target_flags |= MASK_THUMB;
30601
30602       else if (!strncmp (q, "arm", 3))
30603           opts->x_target_flags &= ~MASK_THUMB;
30604
30605       else if (!strncmp (q, "fpu=", 4))
30606         {
30607           int fpu_index;
30608           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30609                                        &fpu_index, CL_TARGET))
30610             {
30611               error ("invalid fpu for attribute(target(\"%s\"))", q);
30612               return false;
30613             }
30614           if (fpu_index == TARGET_FPU_auto)
30615             {
30616               /* This doesn't really make sense until we support
30617                  general dynamic selection of the architecture and all
30618                  sub-features.  */
30619               sorry ("auto fpu selection not currently permitted here");
30620               return false;
30621             }
30622           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30623         }
30624       else
30625         {
30626           error ("attribute(target(\"%s\")) is unknown", q);
30627           return false;
30628         }
30629     }
30630
30631   return true;
30632 }
30633
30634 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30635
30636 tree
30637 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30638                                  struct gcc_options *opts_set)
30639 {
30640   struct cl_target_option cl_opts;
30641
30642   if (!arm_valid_target_attribute_rec (args, opts))
30643     return NULL_TREE;
30644
30645   cl_target_option_save (&cl_opts, opts);
30646   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30647   arm_option_check_internal (opts);
30648   /* Do any overrides, such as global options arch=xxx.  */
30649   arm_option_override_internal (opts, opts_set);
30650
30651   return build_target_option_node (opts);
30652 }
30653
30654 static void
30655 add_attribute  (const char * mode, tree *attributes)
30656 {
30657   size_t len = strlen (mode);
30658   tree value = build_string (len, mode);
30659
30660   TREE_TYPE (value) = build_array_type (char_type_node,
30661                                         build_index_type (size_int (len)));
30662
30663   *attributes = tree_cons (get_identifier ("target"),
30664                            build_tree_list (NULL_TREE, value),
30665                            *attributes);
30666 }
30667
30668 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30669
30670 static void
30671 arm_insert_attributes (tree fndecl, tree * attributes)
30672 {
30673   const char *mode;
30674
30675   if (! TARGET_FLIP_THUMB)
30676     return;
30677
30678   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30679       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30680    return;
30681
30682   /* Nested definitions must inherit mode.  */
30683   if (current_function_decl)
30684    {
30685      mode = TARGET_THUMB ? "thumb" : "arm";
30686      add_attribute (mode, attributes);
30687      return;
30688    }
30689
30690   /* If there is already a setting don't change it.  */
30691   if (lookup_attribute ("target", *attributes) != NULL)
30692     return;
30693
30694   mode = thumb_flipper ? "thumb" : "arm";
30695   add_attribute (mode, attributes);
30696
30697   thumb_flipper = !thumb_flipper;
30698 }
30699
30700 /* Hook to validate attribute((target("string"))).  */
30701
30702 static bool
30703 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30704                               tree args, int ARG_UNUSED (flags))
30705 {
30706   bool ret = true;
30707   struct gcc_options func_options;
30708   tree cur_tree, new_optimize;
30709   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30710
30711   /* Get the optimization options of the current function.  */
30712   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30713
30714   /* If the function changed the optimization levels as well as setting target
30715      options, start with the optimizations specified.  */
30716   if (!func_optimize)
30717     func_optimize = optimization_default_node;
30718
30719   /* Init func_options.  */
30720   memset (&func_options, 0, sizeof (func_options));
30721   init_options_struct (&func_options, NULL);
30722   lang_hooks.init_options_struct (&func_options);
30723
30724   /* Initialize func_options to the defaults.  */
30725   cl_optimization_restore (&func_options,
30726                            TREE_OPTIMIZATION (func_optimize));
30727
30728   cl_target_option_restore (&func_options,
30729                             TREE_TARGET_OPTION (target_option_default_node));
30730
30731   /* Set func_options flags with new target mode.  */
30732   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30733                                               &global_options_set);
30734
30735   if (cur_tree == NULL_TREE)
30736     ret = false;
30737
30738   new_optimize = build_optimization_node (&func_options);
30739
30740   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30741
30742   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30743
30744   finalize_options_struct (&func_options);
30745
30746   return ret;
30747 }
30748
30749 /* Match an ISA feature bitmap to a named FPU.  We always use the
30750    first entry that exactly matches the feature set, so that we
30751    effectively canonicalize the FPU name for the assembler.  */
30752 static const char*
30753 arm_identify_fpu_from_isa (sbitmap isa)
30754 {
30755   auto_sbitmap fpubits (isa_num_bits);
30756   auto_sbitmap cand_fpubits (isa_num_bits);
30757
30758   bitmap_and (fpubits, isa, isa_all_fpubits);
30759
30760   /* If there are no ISA feature bits relating to the FPU, we must be
30761      doing soft-float.  */
30762   if (bitmap_empty_p (fpubits))
30763     return "softvfp";
30764
30765   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30766     {
30767       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30768       if (bitmap_equal_p (fpubits, cand_fpubits))
30769         return all_fpus[i].name;
30770     }
30771   /* We must find an entry, or things have gone wrong.  */
30772   gcc_unreachable ();
30773 }
30774
30775 void
30776 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30777 {
30778
30779   fprintf (stream, "\t.syntax unified\n");
30780
30781   if (TARGET_THUMB)
30782     {
30783       if (is_called_in_ARM_mode (decl)
30784           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30785               && cfun->is_thunk))
30786         fprintf (stream, "\t.code 32\n");
30787       else if (TARGET_THUMB1)
30788         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30789       else
30790         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30791     }
30792   else
30793     fprintf (stream, "\t.arm\n");
30794
30795   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30796                (TARGET_SOFT_FLOAT
30797                 ? "softvfp"
30798                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30799
30800   if (TARGET_POKE_FUNCTION_NAME)
30801     arm_poke_function_name (stream, (const char *) name);
30802 }
30803
30804 /* If MEM is in the form of [base+offset], extract the two parts
30805    of address and set to BASE and OFFSET, otherwise return false
30806    after clearing BASE and OFFSET.  */
30807
30808 static bool
30809 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30810 {
30811   rtx addr;
30812
30813   gcc_assert (MEM_P (mem));
30814
30815   addr = XEXP (mem, 0);
30816
30817   /* Strip off const from addresses like (const (addr)).  */
30818   if (GET_CODE (addr) == CONST)
30819     addr = XEXP (addr, 0);
30820
30821   if (GET_CODE (addr) == REG)
30822     {
30823       *base = addr;
30824       *offset = const0_rtx;
30825       return true;
30826     }
30827
30828   if (GET_CODE (addr) == PLUS
30829       && GET_CODE (XEXP (addr, 0)) == REG
30830       && CONST_INT_P (XEXP (addr, 1)))
30831     {
30832       *base = XEXP (addr, 0);
30833       *offset = XEXP (addr, 1);
30834       return true;
30835     }
30836
30837   *base = NULL_RTX;
30838   *offset = NULL_RTX;
30839
30840   return false;
30841 }
30842
30843 /* If INSN is a load or store of address in the form of [base+offset],
30844    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30845    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30846    otherwise return FALSE.  */
30847
30848 static bool
30849 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30850 {
30851   rtx x, dest, src;
30852
30853   gcc_assert (INSN_P (insn));
30854   x = PATTERN (insn);
30855   if (GET_CODE (x) != SET)
30856     return false;
30857
30858   src = SET_SRC (x);
30859   dest = SET_DEST (x);
30860   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30861     {
30862       *is_load = false;
30863       extract_base_offset_in_addr (dest, base, offset);
30864     }
30865   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30866     {
30867       *is_load = true;
30868       extract_base_offset_in_addr (src, base, offset);
30869     }
30870   else
30871     return false;
30872
30873   return (*base != NULL_RTX && *offset != NULL_RTX);
30874 }
30875
30876 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30877
30878    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30879    and PRI are only calculated for these instructions.  For other instruction,
30880    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30881    instruction fusion can be supported by returning different priorities.
30882
30883    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30884
30885 static void
30886 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30887                            int *fusion_pri, int *pri)
30888 {
30889   int tmp, off_val;
30890   bool is_load;
30891   rtx base, offset;
30892
30893   gcc_assert (INSN_P (insn));
30894
30895   tmp = max_pri - 1;
30896   if (!fusion_load_store (insn, &base, &offset, &is_load))
30897     {
30898       *pri = tmp;
30899       *fusion_pri = tmp;
30900       return;
30901     }
30902
30903   /* Load goes first.  */
30904   if (is_load)
30905     *fusion_pri = tmp - 1;
30906   else
30907     *fusion_pri = tmp - 2;
30908
30909   tmp /= 2;
30910
30911   /* INSN with smaller base register goes first.  */
30912   tmp -= ((REGNO (base) & 0xff) << 20);
30913
30914   /* INSN with smaller offset goes first.  */
30915   off_val = (int)(INTVAL (offset));
30916   if (off_val >= 0)
30917     tmp -= (off_val & 0xfffff);
30918   else
30919     tmp += ((- off_val) & 0xfffff);
30920
30921   *pri = tmp;
30922   return;
30923 }
30924
30925
30926 /* Construct and return a PARALLEL RTX vector with elements numbering the
30927    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30928    the vector - from the perspective of the architecture.  This does not
30929    line up with GCC's perspective on lane numbers, so we end up with
30930    different masks depending on our target endian-ness.  The diagram
30931    below may help.  We must draw the distinction when building masks
30932    which select one half of the vector.  An instruction selecting
30933    architectural low-lanes for a big-endian target, must be described using
30934    a mask selecting GCC high-lanes.
30935
30936                  Big-Endian             Little-Endian
30937
30938 GCC             0   1   2   3           3   2   1   0
30939               | x | x | x | x |       | x | x | x | x |
30940 Architecture    3   2   1   0           3   2   1   0
30941
30942 Low Mask:         { 2, 3 }                { 0, 1 }
30943 High Mask:        { 0, 1 }                { 2, 3 }
30944 */
30945
30946 rtx
30947 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30948 {
30949   int nunits = GET_MODE_NUNITS (mode);
30950   rtvec v = rtvec_alloc (nunits / 2);
30951   int high_base = nunits / 2;
30952   int low_base = 0;
30953   int base;
30954   rtx t1;
30955   int i;
30956
30957   if (BYTES_BIG_ENDIAN)
30958     base = high ? low_base : high_base;
30959   else
30960     base = high ? high_base : low_base;
30961
30962   for (i = 0; i < nunits / 2; i++)
30963     RTVEC_ELT (v, i) = GEN_INT (base + i);
30964
30965   t1 = gen_rtx_PARALLEL (mode, v);
30966   return t1;
30967 }
30968
30969 /* Check OP for validity as a PARALLEL RTX vector with elements
30970    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30971    from the perspective of the architecture.  See the diagram above
30972    arm_simd_vect_par_cnst_half_p for more details.  */
30973
30974 bool
30975 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30976                                        bool high)
30977 {
30978   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30979   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30980   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30981   int i = 0;
30982
30983   if (!VECTOR_MODE_P (mode))
30984     return false;
30985
30986   if (count_op != count_ideal)
30987     return false;
30988
30989   for (i = 0; i < count_ideal; i++)
30990     {
30991       rtx elt_op = XVECEXP (op, 0, i);
30992       rtx elt_ideal = XVECEXP (ideal, 0, i);
30993
30994       if (!CONST_INT_P (elt_op)
30995           || INTVAL (elt_ideal) != INTVAL (elt_op))
30996         return false;
30997     }
30998   return true;
30999 }
31000
31001 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31002    in Thumb1.  */
31003 static bool
31004 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31005                          const_tree)
31006 {
31007   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31008   if (vcall_offset && TARGET_THUMB1)
31009     return false;
31010
31011   /* Otherwise ok.  */
31012   return true;
31013 }
31014
31015 /* Generate RTL for a conditional branch with rtx comparison CODE in
31016    mode CC_MODE. The destination of the unlikely conditional branch
31017    is LABEL_REF.  */
31018
31019 void
31020 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31021                           rtx label_ref)
31022 {
31023   rtx x;
31024   x = gen_rtx_fmt_ee (code, VOIDmode,
31025                       gen_rtx_REG (cc_mode, CC_REGNUM),
31026                       const0_rtx);
31027
31028   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31029                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31030                             pc_rtx);
31031   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31032 }
31033
31034 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31035
31036    For pure-code sections there is no letter code for this attribute, so
31037    output all the section flags numerically when this is needed.  */
31038
31039 static bool
31040 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31041 {
31042
31043   if (flags & SECTION_ARM_PURECODE)
31044     {
31045       *num = 0x20000000;
31046
31047       if (!(flags & SECTION_DEBUG))
31048         *num |= 0x2;
31049       if (flags & SECTION_EXCLUDE)
31050         *num |= 0x80000000;
31051       if (flags & SECTION_WRITE)
31052         *num |= 0x1;
31053       if (flags & SECTION_CODE)
31054         *num |= 0x4;
31055       if (flags & SECTION_MERGE)
31056         *num |= 0x10;
31057       if (flags & SECTION_STRINGS)
31058         *num |= 0x20;
31059       if (flags & SECTION_TLS)
31060         *num |= 0x400;
31061       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31062         *num |= 0x200;
31063
31064         return true;
31065     }
31066
31067   return false;
31068 }
31069
31070 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31071
31072    If pure-code is passed as an option, make sure all functions are in
31073    sections that have the SHF_ARM_PURECODE attribute.  */
31074
31075 static section *
31076 arm_function_section (tree decl, enum node_frequency freq,
31077                       bool startup, bool exit)
31078 {
31079   const char * section_name;
31080   section * sec;
31081
31082   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31083     return default_function_section (decl, freq, startup, exit);
31084
31085   if (!target_pure_code)
31086     return default_function_section (decl, freq, startup, exit);
31087
31088
31089   section_name = DECL_SECTION_NAME (decl);
31090
31091   /* If a function is not in a named section then it falls under the 'default'
31092      text section, also known as '.text'.  We can preserve previous behavior as
31093      the default text section already has the SHF_ARM_PURECODE section
31094      attribute.  */
31095   if (!section_name)
31096     {
31097       section *default_sec = default_function_section (decl, freq, startup,
31098                                                        exit);
31099
31100       /* If default_sec is not null, then it must be a special section like for
31101          example .text.startup.  We set the pure-code attribute and return the
31102          same section to preserve existing behavior.  */
31103       if (default_sec)
31104           default_sec->common.flags |= SECTION_ARM_PURECODE;
31105       return default_sec;
31106     }
31107
31108   /* Otherwise look whether a section has already been created with
31109      'section_name'.  */
31110   sec = get_named_section (decl, section_name, 0);
31111   if (!sec)
31112     /* If that is not the case passing NULL as the section's name to
31113        'get_named_section' will create a section with the declaration's
31114        section name.  */
31115     sec = get_named_section (decl, NULL, 0);
31116
31117   /* Set the SHF_ARM_PURECODE attribute.  */
31118   sec->common.flags |= SECTION_ARM_PURECODE;
31119
31120   return sec;
31121 }
31122
31123 /* Implements the TARGET_SECTION_FLAGS hook.
31124
31125    If DECL is a function declaration and pure-code is passed as an option
31126    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31127    section's name and RELOC indicates whether the declarations initializer may
31128    contain runtime relocations.  */
31129
31130 static unsigned int
31131 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31132 {
31133   unsigned int flags = default_section_type_flags (decl, name, reloc);
31134
31135   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31136     flags |= SECTION_ARM_PURECODE;
31137
31138   return flags;
31139 }
31140
31141 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31142
31143 static void
31144 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31145                            rtx op0, rtx op1,
31146                            rtx *quot_p, rtx *rem_p)
31147 {
31148   if (mode == SImode)
31149     gcc_assert (!TARGET_IDIV);
31150
31151   scalar_int_mode libval_mode
31152     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31153
31154   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31155                                         libval_mode,
31156                                         op0, GET_MODE (op0),
31157                                         op1, GET_MODE (op1));
31158
31159   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31160   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31161                                        GET_MODE_SIZE (mode));
31162
31163   gcc_assert (quotient);
31164   gcc_assert (remainder);
31165
31166   *quot_p = quotient;
31167   *rem_p = remainder;
31168 }
31169
31170 /*  This function checks for the availability of the coprocessor builtin passed
31171     in BUILTIN for the current target.  Returns true if it is available and
31172     false otherwise.  If a BUILTIN is passed for which this function has not
31173     been implemented it will cause an exception.  */
31174
31175 bool
31176 arm_coproc_builtin_available (enum unspecv builtin)
31177 {
31178   /* None of these builtins are available in Thumb mode if the target only
31179      supports Thumb-1.  */
31180   if (TARGET_THUMB1)
31181     return false;
31182
31183   switch (builtin)
31184     {
31185       case VUNSPEC_CDP:
31186       case VUNSPEC_LDC:
31187       case VUNSPEC_LDCL:
31188       case VUNSPEC_STC:
31189       case VUNSPEC_STCL:
31190       case VUNSPEC_MCR:
31191       case VUNSPEC_MRC:
31192         if (arm_arch4)
31193           return true;
31194         break;
31195       case VUNSPEC_CDP2:
31196       case VUNSPEC_LDC2:
31197       case VUNSPEC_LDC2L:
31198       case VUNSPEC_STC2:
31199       case VUNSPEC_STC2L:
31200       case VUNSPEC_MCR2:
31201       case VUNSPEC_MRC2:
31202         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31203            ARMv8-{A,M}.  */
31204         if (arm_arch5)
31205           return true;
31206         break;
31207       case VUNSPEC_MCRR:
31208       case VUNSPEC_MRRC:
31209         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31210            ARMv8-{A,M}.  */
31211         if (arm_arch6 || arm_arch5te)
31212           return true;
31213         break;
31214       case VUNSPEC_MCRR2:
31215       case VUNSPEC_MRRC2:
31216         if (arm_arch6)
31217           return true;
31218         break;
31219       default:
31220         gcc_unreachable ();
31221     }
31222   return false;
31223 }
31224
31225 /* This function returns true if OP is a valid memory operand for the ldc and
31226    stc coprocessor instructions and false otherwise.  */
31227
31228 bool
31229 arm_coproc_ldc_stc_legitimate_address (rtx op)
31230 {
31231   HOST_WIDE_INT range;
31232   /* Has to be a memory operand.  */
31233   if (!MEM_P (op))
31234     return false;
31235
31236   op = XEXP (op, 0);
31237
31238   /* We accept registers.  */
31239   if (REG_P (op))
31240     return true;
31241
31242   switch GET_CODE (op)
31243     {
31244       case PLUS:
31245         {
31246           /* Or registers with an offset.  */
31247           if (!REG_P (XEXP (op, 0)))
31248             return false;
31249
31250           op = XEXP (op, 1);
31251
31252           /* The offset must be an immediate though.  */
31253           if (!CONST_INT_P (op))
31254             return false;
31255
31256           range = INTVAL (op);
31257
31258           /* Within the range of [-1020,1020].  */
31259           if (!IN_RANGE (range, -1020, 1020))
31260             return false;
31261
31262           /* And a multiple of 4.  */
31263           return (range % 4) == 0;
31264         }
31265       case PRE_INC:
31266       case POST_INC:
31267       case PRE_DEC:
31268       case POST_DEC:
31269         return REG_P (XEXP (op, 0));
31270       default:
31271         gcc_unreachable ();
31272     }
31273   return false;
31274 }
31275
31276 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31277
31278    In VFPv1, VFP registers could only be accessed in the mode they were
31279    set, so subregs would be invalid there.  However, we don't support
31280    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31281
31282    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31283    VFP registers in little-endian order.  We can't describe that accurately to
31284    GCC, so avoid taking subregs of such values.
31285
31286    The only exception is going from a 128-bit to a 64-bit type.  In that
31287    case the data layout happens to be consistent for big-endian, so we
31288    explicitly allow that case.  */
31289
31290 static bool
31291 arm_can_change_mode_class (machine_mode from, machine_mode to,
31292                            reg_class_t rclass)
31293 {
31294   if (TARGET_BIG_END
31295       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31296       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31297           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31298       && reg_classes_intersect_p (VFP_REGS, rclass))
31299     return false;
31300   return true;
31301 }
31302
31303 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31304    strcpy from constants will be faster.  */
31305
31306 static HOST_WIDE_INT
31307 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31308 {
31309   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31310   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31311     return MAX (align, BITS_PER_WORD * factor);
31312   return align;
31313 }
31314
31315 #if CHECKING_P
31316 namespace selftest {
31317
31318 /* Scan the static data tables generated by parsecpu.awk looking for
31319    potential issues with the data.  We primarily check for
31320    inconsistencies in the option extensions at present (extensions
31321    that duplicate others but aren't marked as aliases).  Furthermore,
31322    for correct canonicalization later options must never be a subset
31323    of an earlier option.  Any extension should also only specify other
31324    feature bits and never an architecture bit.  The architecture is inferred
31325    from the declaration of the extension.  */
31326 static void
31327 arm_test_cpu_arch_data (void)
31328 {
31329   const arch_option *arch;
31330   const cpu_option *cpu;
31331   auto_sbitmap target_isa (isa_num_bits);
31332   auto_sbitmap isa1 (isa_num_bits);
31333   auto_sbitmap isa2 (isa_num_bits);
31334
31335   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31336     {
31337       const cpu_arch_extension *ext1, *ext2;
31338
31339       if (arch->common.extensions == NULL)
31340         continue;
31341
31342       arm_initialize_isa (target_isa, arch->common.isa_bits);
31343
31344       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31345         {
31346           if (ext1->alias)
31347             continue;
31348
31349           arm_initialize_isa (isa1, ext1->isa_bits);
31350           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31351             {
31352               if (ext2->alias || ext1->remove != ext2->remove)
31353                 continue;
31354
31355               arm_initialize_isa (isa2, ext2->isa_bits);
31356               /* If the option is a subset of the parent option, it doesn't
31357                  add anything and so isn't useful.  */
31358               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31359
31360               /* If the extension specifies any architectural bits then
31361                  disallow it.  Extensions should only specify feature bits.  */
31362               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31363             }
31364         }
31365     }
31366
31367   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31368     {
31369       const cpu_arch_extension *ext1, *ext2;
31370
31371       if (cpu->common.extensions == NULL)
31372         continue;
31373
31374       arm_initialize_isa (target_isa, arch->common.isa_bits);
31375
31376       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31377         {
31378           if (ext1->alias)
31379             continue;
31380
31381           arm_initialize_isa (isa1, ext1->isa_bits);
31382           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31383             {
31384               if (ext2->alias || ext1->remove != ext2->remove)
31385                 continue;
31386
31387               arm_initialize_isa (isa2, ext2->isa_bits);
31388               /* If the option is a subset of the parent option, it doesn't
31389                  add anything and so isn't useful.  */
31390               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31391
31392               /* If the extension specifies any architectural bits then
31393                  disallow it.  Extensions should only specify feature bits.  */
31394               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31395             }
31396         }
31397     }
31398 }
31399
31400 /* Scan the static data tables generated by parsecpu.awk looking for
31401    potential issues with the data.  Here we check for consistency between the
31402    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31403    a feature bit that is not defined by any FPU flag.  */
31404 static void
31405 arm_test_fpu_data (void)
31406 {
31407   auto_sbitmap isa_all_fpubits (isa_num_bits);
31408   auto_sbitmap fpubits (isa_num_bits);
31409   auto_sbitmap tmpset (isa_num_bits);
31410
31411   static const enum isa_feature fpu_bitlist[]
31412     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31413   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31414
31415   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31416   {
31417     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31418     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31419     bitmap_clear (isa_all_fpubits);
31420     bitmap_copy (isa_all_fpubits, tmpset);
31421   }
31422
31423   if (!bitmap_empty_p (isa_all_fpubits))
31424     {
31425         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31426                          " group that are not defined by any FPU.\n"
31427                          "       Check your arm-cpus.in.\n");
31428         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31429     }
31430 }
31431
31432 static void
31433 arm_run_selftests (void)
31434 {
31435   arm_test_cpu_arch_data ();
31436   arm_test_fpu_data ();
31437 }
31438 } /* Namespace selftest.  */
31439
31440 #undef TARGET_RUN_TARGET_SELFTESTS
31441 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31442 #endif /* CHECKING_P */
31443
31444 struct gcc_target targetm = TARGET_INITIALIZER;
31445
31446 #include "gt-arm.h"